aboutsummaryrefslogtreecommitdiffstats
path: root/sandbox/main.c
blob: 44b1742e14414c1c7d0072f28427201dadacd82e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#include "sandbox.h"

static struct timespec start_timespec = { 0 };

static noreturn void finish(enum poe_exit_reason reason, int status, const char *fmt, ...)
{
	if (!start_timespec.tv_sec && !start_timespec.tv_nsec)
		bug("start_timespec not set?");
	struct timespec end_timespec;
	if (clock_gettime(CLOCK_MONOTONIC, &end_timespec))
		bug("clock_gettime failed");

	int elapsed = (end_timespec.tv_sec - start_timespec.tv_sec) * 1000 +
		(end_timespec.tv_nsec - start_timespec.tv_nsec) / 1000 / 1000;
	int xx[] = { reason, status, elapsed };
	fwrite(xx, sizeof(int), sizeof(xx) / sizeof(int), stderr);
	if (fmt) {
		va_list args;
		va_start(args, fmt);
		vfprintf(stderr, fmt, args);
		va_end(args);
	}
	exit(0);
}

static void handle_stdout(int fd, int orig_fd)
{
	assert(PIPE_BUF % 4 == 0);
	uint32_t buf[PIPE_BUF / 4 + 2];
	ssize_t n = read(fd, (char *)(buf + 2), PIPE_BUF);
	if (n < 0)
		bug("read from stdout/err pipe");
	buf[0] = (uint32_t)orig_fd;
	buf[1] = (uint32_t)n;
	if (write(STDOUT_FILENO, buf, n + 8) < 0)
		bug("write to stdout failed");
}

static void handle_signal(pid_t mpid, struct signalfd_siginfo *si)
{
	if (si->ssi_signo == SIGINT || si->ssi_signo == SIGTERM ||
			si->ssi_signo == SIGHUP)
		finish(POE_TIMEDOUT, -1, "Supervisor terminated");
	if (si->ssi_signo != SIGCHLD)
		bug("unknown signal %d", si->ssi_signo);

	int status;
	pid_t spid;
	while ((spid = waitpid(-mpid, &status, WNOHANG | __WALL)) > 0) {
		if (spid == mpid && WIFEXITED(status)) {
			finish(POE_SUCCESS, WEXITSTATUS(status), NULL);
		} else if (spid == mpid && WIFSIGNALED(status)) {
			int sig = WTERMSIG(status);
			finish(POE_SIGNALED, -1, "Program terminated with signal %d (%s)", sig, strsignal(sig));
		} else if (WIFSTOPPED(status)) {
			switch (status >> 16 & 0xff) {
			case PTRACE_EVENT_SECCOMP:
				errno = 0;
				int syscalln = ptrace(PTRACE_PEEKUSER, spid, sizeof(long) * ORIG_RAX);
				if (errno)
					bug("ptrace(PTRACE_PEEKUSER) failed");
				char *name = poe_seccomp_syscall_resolve(syscalln);
				finish(POE_SIGNALED, -1, "System call %s is blocked", name);
				break;
			case PTRACE_EVENT_CLONE:
			case PTRACE_EVENT_FORK:
			case PTRACE_EVENT_VFORK:
				ptrace(PTRACE_CONT, spid, 0, 0);
				break;
			default:
				ptrace(PTRACE_CONT, spid, 0, WSTOPSIG(status));
				break;
			}
		}
	}
	if (spid < 0) {
		if (errno == ECHILD)
			bug("child dies too early (before raising SIGSTOP)");
		else
			bug("waitpid failed");
	}
}

int main(int argc, char *argv[])
{
	if (argc < 5)
		die("usage: runner basedir overlaydir sourcefile cmdl..");

	struct playground *pg = poe_playground_init(argv[1], argv[2]);
	if (!pg)
		die("playground init failed");
	if (poe_playground_init_command_line(pg, argv + 4, argv[3]))
		die("copy program failed");

	int stdout_fd[2], stderr_fd[2], child_fd[2];
	if (pipe2(stdout_fd, O_DIRECT))
		bug("pipe2 failed");
	if (pipe2(stderr_fd, O_DIRECT))
		bug("pipe2 failed");
	if (pipe2(child_fd, O_DIRECT | O_CLOEXEC))
		bug("pipe2 failed");

	// init cgroup: create root hierarchy and setup controllers
	if (poe_cgroup_init())
		die("failed to init cgroup");

	// TODO: CLONE_NEWUSER
	pid_t pid = (pid_t)syscall(SYS_clone, SIGCHLD | CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWUTS | CLONE_NEWNET, 0);
	if (pid < 0)
		bug("clone failed");
	if (!pid) {
		poe_child_do(pg, stdout_fd, stderr_fd, child_fd);
		bug("unreachable");
	}

	if (close(stdout_fd[1]) || close(stderr_fd[1]) || close(child_fd[1]))
		bug("close child write pipe failed");

	int epoll_fd = epoll_create1(0);
	if (epoll_fd < 0)
		bug("epoll_create1 failed");

	sigset_t mask;
	sigemptyset(&mask);
	sigaddset(&mask, SIGCHLD);
	sigaddset(&mask, SIGINT);
	sigaddset(&mask, SIGTERM);
	sigaddset(&mask, SIGHUP);
	sigprocmask(SIG_BLOCK, &mask, NULL);
	int signal_fd = signalfd(-1, &mask, 0);
	if (signal_fd < 0)
		bug("signalfd failed");

	int timer_fd = timerfd_create(CLOCK_MONOTONIC, 0);
	if (timer_fd < 0)
		bug("timerfd_create failed");
	if (timerfd_settime(timer_fd, 0, &(struct itimerspec) { .it_value.tv_sec = POE_TIME_LIMIT }, NULL))
		bug("timerfd_settime failed");

#define ADD(_fd__) do if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, _fd__, &(struct epoll_event) { .data.fd = _fd__, .events = EPOLLRDHUP|EPOLLIN })) \
	bug("EPOLL_CTL_ADD failed"); while (0)
	ADD(signal_fd);
	ADD(timer_fd);
	ADD(child_fd[0]);
	ADD(stdout_fd[0]);
	ADD(stderr_fd[0]);

	if (ptrace(PTRACE_SEIZE, pid, NULL,
				PTRACE_O_TRACECLONE |
				PTRACE_O_TRACEFORK |
				PTRACE_O_TRACESECCOMP |
				PTRACE_O_TRACEVFORK))
		bug("ptrace failed");

	if (poe_cgroup_add(pid))
		die("failed cgroup add");

	while (true) {
		struct epoll_event events[10];
		int n = epoll_wait(epoll_fd, events, sizeof(events) / sizeof(events[0]), -1);
		if (n < 0)
			bug("epoll_wait failed");

		for (int i = 0; i < n; i++) {
			struct epoll_event *ev = &events[i];
			if (ev->events & EPOLLIN) {
				if (ev->data.fd == stdout_fd[0]) {
					handle_stdout(ev->data.fd, STDOUT_FILENO);
				} else if (ev->data.fd == stderr_fd[0]) {
					handle_stdout(ev->data.fd, STDERR_FILENO);
				} else if (ev->data.fd == signal_fd) {
					struct signalfd_siginfo si;
					if (sizeof(si) != read(signal_fd, &si, sizeof(si)))
						die("partial read signalfd");
					handle_signal(pid, &si);
				} else if (ev->data.fd == timer_fd) {
					finish(POE_TIMEDOUT, -1, NULL);
				} else if (ev->data.fd == child_fd[0]) {
					char buf[PIPE_BUF];
					ssize_t nx = read(child_fd[0], buf, sizeof(buf));
					if (nx > 0) // TODO
						die("child err: %s", strndupa(buf, nx));
				}
			}
			if (ev->events & EPOLLERR || ev->events & EPOLLHUP || ev->events & EPOLLRDHUP) {
				// fd closed
				close(ev->data.fd);
				if (ev->data.fd == child_fd[0])
					// exec succeeded
					if (clock_gettime(CLOCK_MONOTONIC, &start_timespec))
						bug("clock_gettime failed");
			}
		}
	}

	bug("unreachable");
}