service: Start services normally when seccomp is disabled
[project/procd.git] / trace / trace.c
1 /*
2  * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License version 2.1
6  * as published by the Free Software Foundation
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  */
13
14 #define _GNU_SOURCE
15 #include <fcntl.h>
16 #include <stddef.h>
17 #include <sys/ptrace.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 #include <sys/user.h>
21 #include <sys/wait.h>
22 #include <unistd.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <syslog.h>
28
29 #ifndef PTRACE_EVENT_STOP
30 /* PTRACE_EVENT_STOP is defined in linux/ptrace.h, but this header
31  * collides with musl's sys/ptrace.h */
32 #define PTRACE_EVENT_STOP 128
33 #endif
34
35 #include <libubox/ulog.h>
36 #include <libubox/uloop.h>
37 #include <libubox/blobmsg.h>
38 #include <libubox/blobmsg_json.h>
39
40 #include "../syscall-names.h"
41
42 #define _offsetof(a, b) __builtin_offsetof(a,b)
43 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
44
45 #ifdef __amd64__
46 #define reg_syscall_nr  _offsetof(struct user, regs.orig_rax)
47 #elif defined(__i386__)
48 #define reg_syscall_nr  _offsetof(struct user, regs.orig_eax)
49 #elif defined(__mips)
50 # ifndef EF_REG2
51 # define EF_REG2        8
52 # endif
53 #define reg_syscall_nr  (EF_REG2 / 4)
54 #elif defined(__arm__)
55 #include <asm/ptrace.h>         /* for PTRACE_SET_SYSCALL */
56 #define reg_syscall_nr  _offsetof(struct user, regs.uregs[7])
57 # if defined(__ARM_EABI__)
58 # define reg_retval_nr  _offsetof(struct user, regs.uregs[0])
59 # endif
60 #else
61 #error tracing is not supported on this architecture
62 #endif
63
64 enum mode {
65         UTRACE,
66         SECCOMP_TRACE,
67 } mode = UTRACE;
68
69 struct tracee {
70         struct uloop_process proc;
71         int in_syscall;
72 };
73
74 static struct tracee tracer;
75 static int syscall_count[SYSCALL_COUNT];
76 static int violation_count;
77 static struct blob_buf b;
78 static int debug;
79 char *json = NULL;
80 int ptrace_restart;
81
82 static void set_syscall(const char *name, int val)
83 {
84         int i;
85
86         for (i = 0; i < SYSCALL_COUNT; i++) {
87                 int sc = syscall_index_to_number(i);
88                 if (syscall_name(sc) && !strcmp(syscall_name(sc), name)) {
89                         syscall_count[i] = val;
90                         return;
91                 }
92         }
93 }
94
95 struct syscall {
96         int syscall;
97         int count;
98 };
99
100 static int cmp_count(const void *a, const void *b)
101 {
102         return ((struct syscall*)b)->count - ((struct syscall*)a)->count;
103 }
104
105 static void print_syscalls(int policy, const char *json)
106 {
107         void *c;
108         int i;
109
110         if (mode == UTRACE) {
111                 set_syscall("rt_sigaction", 1);
112                 set_syscall("sigreturn", 1);
113                 set_syscall("rt_sigreturn", 1);
114                 set_syscall("exit_group", 1);
115                 set_syscall("exit", 1);
116         }
117
118         struct syscall sorted[SYSCALL_COUNT];
119
120         for (i = 0; i < SYSCALL_COUNT; i++) {
121                 sorted[i].syscall = syscall_index_to_number(i);
122                 sorted[i].count = syscall_count[i];
123         }
124
125         qsort(sorted, SYSCALL_COUNT, sizeof(sorted[0]), cmp_count);
126
127         blob_buf_init(&b, 0);
128         c = blobmsg_open_array(&b, "whitelist");
129
130         for (i = 0; i < SYSCALL_COUNT; i++) {
131                 int sc = sorted[i].syscall;
132                 if (!sorted[i].count)
133                         break;
134                 if (syscall_name(sc)) {
135                         if (debug)
136                                 printf("syscall %d (%s) was called %d times\n",
137                                        sc, syscall_name(sc), sorted[i].count);
138                         blobmsg_add_string(&b, NULL, syscall_name(sc));
139                 } else {
140                         ULOG_ERR("no name found for syscall(%d)\n", sc);
141                 }
142         }
143         blobmsg_close_array(&b, c);
144         blobmsg_add_u32(&b, "policy", policy);
145         if (json) {
146                 FILE *fp = fopen(json, "w");
147                 if (fp) {
148                         fprintf(fp, "%s", blobmsg_format_json_indent(b.head, true, 0));
149                         fclose(fp);
150                         ULOG_INFO("saving syscall trace to %s\n", json);
151                 } else {
152                         ULOG_ERR("failed to open %s\n", json);
153                 }
154         } else {
155                 printf("%s\n",
156                         blobmsg_format_json_indent(b.head, true, 0));
157         }
158
159 }
160
161 static void report_seccomp_vialation(pid_t pid, unsigned syscall)
162 {
163         char buf[200];
164         snprintf(buf, sizeof(buf), "/proc/%d/cmdline", pid);
165         int f = open(buf, O_RDONLY);
166         int r = read(f, buf, sizeof(buf) - 1);
167         if (r >= 0)
168                 buf[r] = 0;
169         else
170                 strcpy(buf, "unknown?");
171         close(f);
172
173         if (violation_count < INT_MAX)
174                 violation_count++;
175         int i = syscall_index(syscall);
176         if (i >= 0) {
177                 syscall_count[i]++;
178                 ULOG_ERR("%s[%u] tried to call non-whitelisted syscall: %s (see %s)\n",
179                          buf, pid,  syscall_name(syscall), json);
180         } else {
181                 ULOG_ERR("%s[%u] tried to call non-whitelisted syscall: %d (see %s)\n",
182                          buf, pid,  syscall, json);
183         }
184 }
185
186 static void tracer_cb(struct uloop_process *c, int ret)
187 {
188         struct tracee *tracee = container_of(c, struct tracee, proc);
189         int inject_signal = 0;
190
191         /* We explicitely check for events in upper 16 bits, because
192          * musl (as opposed to glibc) does not report
193          * PTRACE_EVENT_STOP as WIFSTOPPED */
194         if (WIFSTOPPED(ret) || (ret >> 16)) {
195                 if (WSTOPSIG(ret) & 0x80) {
196                         if (!tracee->in_syscall) {
197                                 int syscall = ptrace(PTRACE_PEEKUSER, c->pid, reg_syscall_nr);
198                                 int i = syscall_index(syscall);
199                                 if (i >= 0) {
200                                         syscall_count[i]++;
201                                         if (debug)
202                                                 fprintf(stderr, "%s()\n", syscall_name(syscall));
203                                 } else if (debug) {
204                                         fprintf(stderr, "syscal(%d)\n", syscall);
205                                 }
206                         }
207                         tracee->in_syscall = !tracee->in_syscall;
208                 } else if ((ret >> 8) == (SIGTRAP | (PTRACE_EVENT_FORK << 8)) ||
209                            (ret >> 8) == (SIGTRAP | (PTRACE_EVENT_VFORK << 8)) ||
210                            (ret >> 8) == (SIGTRAP | (PTRACE_EVENT_CLONE << 8))) {
211                         struct tracee *child = calloc(1, sizeof(struct tracee));
212
213                         ptrace(PTRACE_GETEVENTMSG, c->pid, 0, &child->proc.pid);
214                         child->proc.cb = tracer_cb;
215                         ptrace(ptrace_restart, child->proc.pid, 0, 0);
216                         uloop_process_add(&child->proc);
217                         if (debug)
218                                 fprintf(stderr, "Tracing new child %d\n", child->proc.pid);
219                 } else if ((ret >> 16) == PTRACE_EVENT_STOP) {
220                         /* Nothing special to do here */
221                 } else if ((ret >> 8) == (SIGTRAP | (PTRACE_EVENT_SECCOMP << 8))) {
222                         int syscall = ptrace(PTRACE_PEEKUSER, c->pid, reg_syscall_nr);
223 #if defined(__arm__)
224                         ptrace(PTRACE_SET_SYSCALL, c->pid, 0, -1);
225                         ptrace(PTRACE_POKEUSER, c->pid, reg_retval_nr, -ENOSYS);
226 #else
227                         ptrace(PTRACE_POKEUSER, c->pid, reg_syscall_nr, -1);
228 #endif
229                         report_seccomp_vialation(c->pid, syscall);
230                 } else {
231                         inject_signal = WSTOPSIG(ret);
232                         if (debug)
233                                 fprintf(stderr, "Injecting signal %d into pid %d\n",
234                                         inject_signal, tracee->proc.pid);
235                 }
236         } else if (WIFEXITED(ret) || (WIFSIGNALED(ret) && WTERMSIG(ret))) {
237                 if (tracee == &tracer) {
238                         uloop_end(); /* Main process exit */
239                 } else {
240                         if (debug)
241                                 fprintf(stderr, "Child %d exited\n", tracee->proc.pid);
242                         free(tracee);
243                 }
244                 return;
245         }
246
247         ptrace(ptrace_restart, c->pid, 0, inject_signal);
248         uloop_process_add(c);
249 }
250
251 static void sigterm_handler(int signum)
252 {
253         /* When we receive SIGTERM, we forward it to the tracee. After
254          * the tracee exits, trace_cb() will be called and make us
255          * exit too. */
256         kill(tracer.proc.pid, SIGTERM);
257 }
258
259
260 int main(int argc, char **argv, char **envp)
261 {
262         int status, ch, policy = EPERM;
263         pid_t child;
264
265         /* When invoked via seccomp-trace symlink, work as seccomp
266          * violation logger rather than as syscall tracer */
267         if (strstr(argv[0], "seccomp-trace"))
268                 mode = SECCOMP_TRACE;
269
270         while ((ch = getopt(argc, argv, "f:p:")) != -1) {
271                 switch (ch) {
272                 case 'f':
273                         json = optarg;
274                         break;
275                 case 'p':
276                         policy = atoi(optarg);
277                         break;
278                 }
279         }
280
281         if (!json)
282                 json = getenv("SECCOMP_FILE");
283
284         argc -= optind;
285         argv += optind;
286
287         if (!argc)
288                 return -1;
289
290         if (getenv("TRACE_DEBUG"))
291                 debug = 1;
292         unsetenv("TRACE_DEBUG");
293
294         child = fork();
295
296         if (child == 0) {
297                 char **_argv = calloc(argc + 1, sizeof(char *));
298                 char **_envp;
299                 char *preload = NULL;
300                 const char *old_preload = getenv("LD_PRELOAD");
301                 int newenv = 0;
302                 int envc = 0;
303                 int ret;
304
305                 memcpy(_argv, argv, argc * sizeof(char *));
306
307                 while (envp[envc++])
308                         ;
309
310                 _envp = calloc(envc + 2, sizeof(char *));
311                 switch (mode) {
312                 case UTRACE:
313                         preload = "/lib/libpreload-trace.so";
314                         newenv = 1;
315                         break;
316                 case SECCOMP_TRACE:
317                         preload = "/lib/libpreload-seccomp.so";
318                         newenv = 2;
319                         asprintf(&_envp[1], "SECCOMP_FILE=%s", json ? json : "");
320                         kill(getpid(), SIGSTOP);
321                         break;
322                 }
323                 asprintf(&_envp[0], "LD_PRELOAD=%s%s%s", preload,
324                          old_preload ? ":" : "",
325                          old_preload ? old_preload : "");
326                 memcpy(&_envp[newenv], envp, envc * sizeof(char *));
327
328                 ret = execve(_argv[0], _argv, _envp);
329                 ULOG_ERR("failed to exec %s: %s\n", _argv[0], strerror(errno));
330
331                 free(_argv);
332                 free(_envp);
333                 return ret;
334         }
335
336         if (child < 0)
337                 return -1;
338
339         waitpid(child, &status, WUNTRACED);
340         if (!WIFSTOPPED(status)) {
341                 ULOG_ERR("failed to start %s\n", *argv);
342                 return -1;
343         }
344
345         /* Initialize uloop to catch all ptrace stops from now on. */
346         uloop_init();
347
348         int ptrace_options = PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE;
349         switch (mode) {
350         case UTRACE:
351                 ptrace_options |= PTRACE_O_TRACESYSGOOD;
352                 ptrace_restart = PTRACE_SYSCALL;
353                 break;
354         case SECCOMP_TRACE:
355                 ptrace_options |= PTRACE_O_TRACESECCOMP;
356                 ptrace_restart = PTRACE_CONT;
357                 break;
358         }
359         if (ptrace(PTRACE_SEIZE, child, 0, ptrace_options) == -1) {
360                 ULOG_ERR("PTRACE_SEIZE: %s\n", strerror(errno));
361                 return -1;
362         }
363         if (ptrace(ptrace_restart, child, 0, SIGCONT) == -1) {
364                 ULOG_ERR("ptrace_restart: %s\n", strerror(errno));
365                 return -1;
366         }
367
368         tracer.proc.pid = child;
369         tracer.proc.cb = tracer_cb;
370         uloop_process_add(&tracer.proc);
371         signal(SIGTERM, sigterm_handler); /* Override uloop's SIGTERM handler */
372         uloop_run();
373         uloop_done();
374
375
376         switch (mode) {
377         case UTRACE:
378                 if (!json)
379                         if (asprintf(&json, "/tmp/%s.%u.json", basename(*argv), child) < 0)
380                                 ULOG_ERR("failed to allocate output path: %s\n", strerror(errno));
381                 break;
382         case SECCOMP_TRACE:
383                 if (!violation_count)
384                         return 0;
385                 asprintf(&json, "/tmp/%s.%u.violations.json", basename(*argv), child);
386                 break;
387         }
388         print_syscalls(policy, json);
389         return 0;
390 }