aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKazuki Yamaguchi <k@rhe.jp>2015-12-31 23:31:08 +0900
committerKazuki Yamaguchi <k@rhe.jp>2015-12-31 23:31:08 +0900
commit49f957d0d4d7619ba4958a846f453041f73f83f1 (patch)
tree2c439a1c1dd5354277ba912db96cfadbbc1316f4
parent0785c946b036afd28b0d189472d4546163f50e2b (diff)
downloadpoe-49f957d0d4d7619ba4958a846f453041f73f83f1.tar.gz
add initial sandbox implementation: only seccomp and overlayfs
-rw-r--r--sandbox/.gitignore3
-rw-r--r--sandbox/Makefile8
-rw-r--r--sandbox/playground.c51
-rw-r--r--sandbox/sandbox.c183
-rw-r--r--sandbox/sandbox.h52
-rw-r--r--sandbox/seccomp.c100
6 files changed, 397 insertions, 0 deletions
diff --git a/sandbox/.gitignore b/sandbox/.gitignore
new file mode 100644
index 0000000..7fe4e0c
--- /dev/null
+++ b/sandbox/.gitignore
@@ -0,0 +1,3 @@
+/testbox
+/sandbox
+/*.o
diff --git a/sandbox/Makefile b/sandbox/Makefile
new file mode 100644
index 0000000..5141daa
--- /dev/null
+++ b/sandbox/Makefile
@@ -0,0 +1,8 @@
+CFLAGS += -std=c11 -D_GNU_SOURCE -D_FORTIFY_SOURCE=2 -ggdb3 -O0 -fPIE -fstack-protector-all \
+ -Wall -Wextra -Wshadow -Wmissing-declarations -Wpointer-arith
+LDLIBS = -lseccomp
+LDFLAGS += -pie -Wl,-z,relro,-z,now
+
+#all: sandbox safe_runner
+sandbox: sandbox.c seccomp.c playground.c
+safe_runner: safe_runner.c
diff --git a/sandbox/playground.c b/sandbox/playground.c
new file mode 100644
index 0000000..3ea009e
--- /dev/null
+++ b/sandbox/playground.c
@@ -0,0 +1,51 @@
+#include "sandbox.h"
+
+static char *upperdir = NULL;
+static char *workdir = NULL;
+static char *mergeddir = NULL;
+
+char *
+poe_init_playground(const char *base, const char *env)
+{
+ struct stat s;
+ if (stat(POE_TEMPORARY_BASE, &s) == -1) {
+ if (mkdir(POE_TEMPORARY_BASE, 0755) == -1) ERROR("failed to create temporary base");
+ }
+
+ workdir = strdup(POE_WORKDIR_TEMPLATE);
+ if (!workdir || !mkdtemp(workdir)) ERROR("failed to create workdir");
+ if (chmod(workdir, 0755) == -1) ERROR("failed to chmod workdir");
+ upperdir = strdup(POE_UPPERDIR_TEMPLATE);
+ if (!upperdir || !mkdtemp(upperdir)) ERROR("failed to create upperdir");
+ if (chmod(upperdir, 0755) == -1) ERROR("failed to chmod upperdir");
+ mergeddir = strdup(POE_MERGEDDIR_TEMPLATE);
+ if (!mergeddir || !mkdtemp(mergeddir)) ERROR("failed to create mergeddir");
+ if (chmod(mergeddir, 0755) == -1) ERROR("failed to chmod mergeddir");
+
+ char *opts = NULL;
+ if (asprintf(&opts, "lowerdir=%s:%s,upperdir=%s,workdir=%s", env, base, upperdir, workdir) == -1)
+ ERROR("asprintf() failed");
+ if (mount(NULL, mergeddir, "overlay", MS_NOSUID, opts) == -1)
+ ERROR("mount overlay failed");
+
+ return mergeddir;
+}
+
+void
+poe_destroy_playground()
+{
+ struct stat s;
+ if (mergeddir) {
+ umount(mergeddir);
+ if (rmdir(mergeddir) != -1) fprintf(stderr, "failed remove mergeddir");
+ free(mergeddir);
+ }
+ if (workdir && stat(workdir, &s) != -1) {
+ if (rmdir(workdir) != -1) fprintf(stderr, "failed remove workdir");
+ free(workdir);
+ }
+ if (upperdir && stat(upperdir, &s) != -1) {
+ fprintf(stderr, "remove upperdir");
+ free(upperdir);
+ }
+}
diff --git a/sandbox/sandbox.c b/sandbox/sandbox.c
new file mode 100644
index 0000000..6315102
--- /dev/null
+++ b/sandbox/sandbox.c
@@ -0,0 +1,183 @@
+#include "sandbox.h"
+
+static void
+child(const char *root, char *cmd[])
+{
+ pid_t pid = (pid_t)syscall(SYS_getpid);
+ assert(pid == 1);
+ //TODO: check FDs
+ // die if parent dies
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) == -1) ERROR("prctl(PR_SET_PDEATHSIG, SIGKILL) failed");
+
+ if (sethostname(POE_HOSTNAME, strlen(POE_HOSTNAME)) == -1) ERROR("sethostname() failed");
+ if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1) ERROR("mount / failed");
+ if (mount(root, root, "bind", MS_BIND | MS_REC, NULL) == -1) ERROR("bind root failed");
+ if (chroot(root) == -1) ERROR("chroot() failed");
+ // if (mount(NULL, "/proc", "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL) == -1) ERROR("mount /proc failed");
+ // if (mount(NULL, "/dev", "devtmpfs", MS_NOSUID | MS_NOEXEC, NULL) == -1) ERROR("mount /dev failed");
+ // if (mount(NULL, "/dev/shm", "tmpfs", MS_NOSUID | MS_NODEV, NULL) == -1) ERROR("mount /dev/shm failed");
+ if (mount(NULL, "/tmp", "tmpfs", MS_NOSUID | MS_NODEV, NULL) == -1) ERROR("mount /tmp failed");
+
+ struct passwd *pw = getpwnam(POE_USERNAME);
+ if (!pw) ERROR("getpwnam() failed");
+
+ if (mount(NULL, pw->pw_dir, "tmpfs", MS_NOSUID | MS_NODEV, NULL) == -1) ERROR("mount home failed");
+ if (chdir(pw->pw_dir) == -1) ERROR("chdir() failed");
+ if (setsid() == -1) ERROR("setsid() failed");
+ if (initgroups(POE_USERNAME, pw->pw_gid) == -1) ERROR("initgroups() failed");
+ if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) == -1) ERROR("setresgid() failed");
+ if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) == -1) ERROR("setresuid() failed");
+
+ char path[] = "PATH=/usr/bin";
+ char *env[] = {path, NULL, NULL, NULL, NULL};
+ asprintf(env + 1, "HOME=%s", pw->pw_dir);
+ asprintf(env + 2, "USER=%s", POE_USERNAME);
+ asprintf(env + 3, "LOGNAME=%s", POE_USERNAME);
+
+ // wait parent
+ if (kill(pid, SIGSTOP) == -1) ERROR("kill(self, SIGSTOP) failed");
+
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) ERROR("ptctl(PR_SET_NO_NEW_PRIVS, 1) failed");
+ poe_init_seccomp(SCMP_ACT_TRACE(0));
+
+ if (execvpe(cmd[0], cmd, env) == -1) err(EXIT_FAILURE, "execvpe() failed");
+}
+
+static inline long
+get_arg(pid_t pid, int i)
+{
+ static const int regs[] = {RDI, RSI, RDX, R10, R8, R9};
+ return ptrace(PTRACE_PEEKUSER, pid, sizeof(long) * (size_t)regs[i - 1]);
+}
+
+static enum poe_handler_result
+handle_syscall(pid_t pid, int syscalln)
+{
+ long arg1;
+ switch (syscalln) {
+ case SYS_write:
+ arg1 = get_arg(pid, 1);
+ if (arg1 == 1 || arg1 == 2) {
+ char *pp = (char *)get_arg(pid, 2);
+ int count = (int)get_arg(pid, 3);
+ char fd = (char)arg1;
+ write(1, (void *)&fd, sizeof(fd));
+ write(1, (void *)&count, sizeof(count));
+ for (int k = 0; k < count; k++, pp++) {
+ char d = (char)ptrace(PTRACE_PEEKDATA, pid, pp);
+ write(1, &d, 1);
+ }
+ ptrace(PTRACE_POKEUSER, pid, sizeof(long) * RAX, count);
+ return POE_HANDLED;
+ }
+ break;
+ default:
+ if (DEBUG) {
+ char *rule = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, syscalln);
+ if (!rule) ERROR("seccomp_syscall_resolve_num_arch() failed");
+ fprintf(stderr, "syscall: %s\n", rule);
+ free(rule);
+ }
+ return POE_PROHIBITED;
+ }
+ return POE_ALLOWED;
+}
+
+static void
+result(uint32_t status, uint32_t signal)
+{
+ write(2, (void *)&status, sizeof(status));
+ write(2, (void *)&signal, sizeof(signal));
+ poe_destroy_playground();
+ exit(0);
+}
+
+static void
+parent(const pid_t mpid, int sig_fd)
+{
+ long trace_flags = PTRACE_O_TRACECLONE | PTRACE_O_TRACEFORK | PTRACE_O_TRACESECCOMP | PTRACE_O_TRACEVFORK;
+ if (ptrace(PTRACE_SEIZE, mpid, NULL, trace_flags) == -1) ERROR("ptrace(PTRACE_SEIZE, ) failed");
+
+ while (true) {
+ struct signalfd_siginfo si;
+ ssize_t bytes_r = read(sig_fd, &si, sizeof(si));
+ if (bytes_r == -1) ERROR("read(sig_fd, ) failed");
+ if (si.ssi_signo != SIGCHLD) ERROR("parent: unexpected signal");
+
+ while (true) {
+ int status;
+ pid_t spid = waitpid(-mpid, &status, WNOHANG | __WALL);
+ if (spid == -1) ERROR("waitpid() failed");
+ if (!spid) break;
+
+ if (WIFEXITED(status) && spid == mpid) {
+ result(WEXITSTATUS(status), 0);
+ } else if (WIFSIGNALED(status)) {
+ result(0, WTERMSIG(status));
+ } else if (WIFSTOPPED(status)) {
+ int e = status >> 16 & 0xff;
+ switch (e) {
+ case PTRACE_EVENT_SECCOMP:
+ errno = 0;
+ int syscalln = ptrace(PTRACE_PEEKUSER, spid, sizeof(long) * ORIG_RAX);
+ if (errno) ERROR("ptrace(PTRACE_PEEKUSER, ) failed");
+ enum poe_handler_result ret = handle_syscall(spid, syscalln);
+ if (ret == POE_HANDLED) {
+ // cancel syscall
+ ptrace(PTRACE_POKEUSER, spid, sizeof(long) * ORIG_RAX, -1);
+ } else if (ret == POE_PROHIBITED) {
+ if (DEBUG) {
+ // implicitly prohibited syscall
+ kill(spid, SIGKILL);
+ char *rule = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, syscalln);
+ if (rule) fprintf(stderr, "#### prohibited syscall: %s ####", rule);
+ free(rule);
+ result(0, SIGSYS);
+ }
+ }
+ ptrace(PTRACE_CONT, spid, 0, 0);
+ break;
+ case PTRACE_EVENT_CLONE:
+ case PTRACE_EVENT_FORK:
+ case PTRACE_EVENT_VFORK:
+ ptrace(PTRACE_CONT, spid, 0, 0);
+ break;
+ default:
+ ptrace(PTRACE_CONT, spid, 0, WSTOPSIG(status));
+ break;
+ }
+ }
+ }
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (argc < 4) {
+ ERROR("usage: %s baseroot envroot cmd...", program_invocation_short_name);
+ }
+
+ const char *root = poe_init_playground(argv[1], argv[2]);
+
+ sigset_t mask, omask;
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &mask, &omask);
+ int sig_fd = signalfd(-1, &mask, SFD_CLOEXEC);
+ if (sig_fd == -1) ERROR("signalfd() failed");
+
+ // TODO: CLONE_NEWUSER? require CONFIG_USER_NS=y
+ pid_t pid = (pid_t)syscall(SYS_clone, SIGCHLD | CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWUTS | CLONE_NEWNET, 0);
+ if (pid == -1) {
+ ERROR("clone() failed");
+ }
+ if (pid == 0) {
+ sigprocmask(SIG_SETMASK, &omask, NULL);
+ child(root, argv + 3);
+ } else {
+ parent(pid, sig_fd);
+ }
+
+ ERROR("unreachable");
+}
diff --git a/sandbox/sandbox.h b/sandbox/sandbox.h
new file mode 100644
index 0000000..88a99ca
--- /dev/null
+++ b/sandbox/sandbox.h
@@ -0,0 +1,52 @@
+#ifndef __x86_64__
+# error unsupported
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <errno.h>
+#include <pwd.h>
+#include <grp.h>
+#include <sched.h>
+#include <seccomp.h>
+#include <assert.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <sys/ptrace.h>
+#include <sys/signalfd.h>
+#include <sys/reg.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#define DEBUG true
+#define POE_USERNAME "nobody"
+#define POE_HOSTNAME "poe-sandbox"
+
+#define POE_LOWERDIR "/"
+#define POE_TEMPORARY_BASE "/tmp/poe"
+#define POE_UPPERDIR_TEMPLATE POE_TEMPORARY_BASE "/upperXXXXXX"
+#define POE_WORKDIR_TEMPLATE POE_TEMPORARY_BASE "/workXXXXXX"
+#define POE_MERGEDDIR_TEMPLATE POE_TEMPORARY_BASE "/mergedXXXXXX"
+
+#define ERROR(...) do {\
+ fprintf(stderr, __VA_ARGS__);\
+ if (syscall(SYS_getpid) != 1) poe_destroy_playground();\
+ exit(1);\
+} while (false)
+
+enum poe_handler_result {
+ POE_PROHIBITED,
+ POE_HANDLED,
+ POE_ALLOWED
+};
+
+void poe_init_seccomp(uint32_t);
+
+char * poe_init_playground(const char *, const char *);
+void poe_destroy_playground();
diff --git a/sandbox/seccomp.c b/sandbox/seccomp.c
new file mode 100644
index 0000000..f5d0e37
--- /dev/null
+++ b/sandbox/seccomp.c
@@ -0,0 +1,100 @@
+#include "sandbox.h"
+
+struct syscall_rule {
+ int syscall;
+ uint32_t action;
+};
+#define RULE(name, action) { SCMP_SYS(name), SCMP_ACT_##action }
+static const struct syscall_rule syscall_rules[] = {
+ RULE(ptrace, ERRNO(EPERM)),
+ RULE(prctl, ERRNO(EPERM)),
+ RULE(execve, ALLOW),
+ RULE(clone, ALLOW),
+ RULE(vfork, ALLOW),
+ RULE(wait4, ALLOW),
+ RULE(dup, ALLOW),
+ RULE(dup2, ALLOW),
+ RULE(capget, ALLOW),
+ RULE(kill, ALLOW),
+
+ // safe
+ RULE(futex, ALLOW),
+ RULE(exit, ALLOW),
+ RULE(pipe2, ALLOW),
+ RULE(brk, ALLOW),
+ RULE(mmap, ALLOW),
+ RULE(mprotect, ALLOW),
+ RULE(munmap, ALLOW),
+ RULE(mremap, ALLOW),
+ RULE(madvise, ALLOW),
+ RULE(rt_sigaction, ALLOW),
+ RULE(rt_sigprocmask, ALLOW),
+ RULE(rt_sigreturn, ALLOW),
+ RULE(nanosleep, ALLOW),
+ RULE(getrlimit, ALLOW),
+ RULE(poll, ALLOW),
+ RULE(exit_group, ALLOW),
+ RULE(getpid, ALLOW),
+ RULE(getuid, ALLOW),
+ RULE(getgid, ALLOW),
+ RULE(geteuid, ALLOW),
+ RULE(getegid, ALLOW),
+ RULE(getresuid, ALLOW),
+ RULE(getresgid, ALLOW),
+ RULE(gettimeofday, ALLOW),
+ RULE(clock_gettime, ALLOW),
+ RULE(set_tid_address, ALLOW),
+ RULE(getdents, ALLOW),
+ RULE(arch_prctl, ALLOW),
+ RULE(set_robust_list, ALLOW),
+ RULE(get_robust_list, ALLOW),
+ RULE(sigaltstack, ALLOW),
+ RULE(uname, ALLOW), // dummy? /proc/sys/kernel/*?
+ RULE(getcwd, ALLOW),
+ RULE(getppid, ALLOW),
+ RULE(getpgrp, ALLOW),
+
+ // ????
+ RULE(socket, ERRNO(ENOSYS)),
+ RULE(utimensat, ALLOW),
+ RULE(futimesat, ALLOW),
+ RULE(getxattr, ALLOW),
+
+ // ???
+ RULE(fadvise64, ALLOW),
+ RULE(readlink, ALLOW),
+ RULE(open, ALLOW),
+ RULE(openat, ALLOW),
+ RULE(stat, ALLOW),
+ RULE(close, ALLOW),
+ RULE(read, ALLOW),
+ RULE(readv, ALLOW),
+ RULE(pread64, ALLOW),
+ RULE(write, TRACE(1)),
+ RULE(writev, ALLOW),
+ RULE(pwrite64, ALLOW),
+ RULE(lstat, ALLOW),
+ RULE(fstat, ALLOW),
+ RULE(fcntl, ALLOW),
+ RULE(ioctl, ALLOW),
+ RULE(lseek, ALLOW),
+ RULE(access, ALLOW),
+};
+static const int syscall_rules_count = sizeof(syscall_rules) / sizeof(struct syscall_rule);
+
+void
+poe_init_seccomp(uint32_t act)
+{
+ scmp_filter_ctx ctx = seccomp_init(act);
+ if (!ctx) ERROR("seccomp_init() failed");
+
+ for (int i = 0; i < syscall_rules_count; i++) {
+ struct syscall_rule rule = syscall_rules[i];
+ if (seccomp_rule_add(ctx, rule.action, rule.syscall, 0) < 0) ERROR("seccomp_rule_add() failed");
+ }
+
+ int rc = seccomp_load(ctx);
+ if (rc < 0) ERROR("seccomp_load() failed: %s", strerror(-rc));
+
+ seccomp_release(ctx);
+}