fix a bug in the trigger handling code
[project/procd.git] / instance.c
1 /*
2  * Copyright (C) 2013 Felix Fietkau <nbd@openwrt.org>
3  * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License version 2.1
7  * as published by the Free Software Foundation
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  */
14
15 #include <sys/resource.h>
16 #include <sys/types.h>
17 #include <sys/socket.h>
18 #include <net/if.h>
19 #include <unistd.h>
20 #include <stdint.h>
21 #include <fcntl.h>
22
23 #include "procd.h"
24 #include "service.h"
25 #include "instance.h"
26 #include "md5.h"
27
28 enum {
29         INSTANCE_ATTR_COMMAND,
30         INSTANCE_ATTR_ENV,
31         INSTANCE_ATTR_DATA,
32         INSTANCE_ATTR_NETDEV,
33         INSTANCE_ATTR_FILE,
34         INSTANCE_ATTR_TRIGGER,
35         INSTANCE_ATTR_RESPAWN,
36         INSTANCE_ATTR_NICE,
37         __INSTANCE_ATTR_MAX
38 };
39
40 static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = {
41         [INSTANCE_ATTR_COMMAND] = { "command", BLOBMSG_TYPE_ARRAY },
42         [INSTANCE_ATTR_ENV] = { "env", BLOBMSG_TYPE_TABLE },
43         [INSTANCE_ATTR_DATA] = { "data", BLOBMSG_TYPE_TABLE },
44         [INSTANCE_ATTR_NETDEV] = { "netdev", BLOBMSG_TYPE_ARRAY },
45         [INSTANCE_ATTR_FILE] = { "file", BLOBMSG_TYPE_ARRAY },
46         [INSTANCE_ATTR_TRIGGER] = { "triggers", BLOBMSG_TYPE_ARRAY },
47         [INSTANCE_ATTR_RESPAWN] = { "respawn", BLOBMSG_TYPE_ARRAY },
48         [INSTANCE_ATTR_NICE] = { "nice", BLOBMSG_TYPE_INT32 },
49 };
50
51 struct instance_netdev {
52         struct blobmsg_list_node node;
53         int ifindex;
54 };
55
56 struct instance_file {
57         struct blobmsg_list_node node;
58         uint32_t md5[4];
59 };
60
61 static void
62 instance_run(struct service_instance *in)
63 {
64         struct blobmsg_list_node *var;
65         struct blob_attr *cur;
66         char **argv;
67         int argc = 1; /* NULL terminated */
68         int rem, fd;
69
70         if (in->nice)
71                 setpriority(PRIO_PROCESS, 0, in->nice);
72
73         blobmsg_for_each_attr(cur, in->command, rem)
74                 argc++;
75
76         blobmsg_list_for_each(&in->env, var)
77                 setenv(blobmsg_name(var->data), blobmsg_data(var->data), 1);
78
79         argv = alloca(sizeof(char *) * argc);
80         argc = 0;
81
82         blobmsg_for_each_attr(cur, in->command, rem)
83                 argv[argc++] = blobmsg_data(cur);
84
85         argv[argc] = NULL;
86         fd = open("/dev/null", O_RDWR);
87         if (fd > -1) {
88                 dup2(fd, STDIN_FILENO);
89                 dup2(fd, STDOUT_FILENO);
90                 dup2(fd, STDERR_FILENO);
91                 if (fd > STDERR_FILENO)
92                         close(fd);
93         }
94         execvp(argv[0], argv);
95         exit(127);
96 }
97
98 void
99 instance_start(struct service_instance *in)
100 {
101         int pid;
102
103         if (in->proc.pending)
104                 return;
105
106         in->restart = false;
107         in->halt = !in->respawn;
108
109         if (!in->valid)
110                 return;
111
112         pid = fork();
113         if (pid < 0)
114                 return;
115
116         if (!pid) {
117                 uloop_done();
118                 instance_run(in);
119                 return;
120         }
121
122         DEBUG(1, "Started instance %s::%s\n", in->srv->name, in->name);
123         in->proc.pid = pid;
124         clock_gettime(CLOCK_MONOTONIC, &in->start);
125         uloop_process_add(&in->proc);
126 }
127
128 static void
129 instance_timeout(struct uloop_timeout *t)
130 {
131         struct service_instance *in;
132
133         in = container_of(t, struct service_instance, timeout);
134
135         if (!in->halt && (in->restart || in->respawn))
136                 instance_start(in);
137 }
138
139 static void
140 instance_exit(struct uloop_process *p, int ret)
141 {
142         struct service_instance *in;
143         struct timespec tp;
144         long runtime;
145
146         in = container_of(p, struct service_instance, proc);
147
148         clock_gettime(CLOCK_MONOTONIC, &tp);
149         runtime = tp.tv_sec - in->start.tv_sec;
150
151         DEBUG(1, "Instance %s::%s exit with error code %d after %ld seconds\n", in->srv->name, in->name, ret, runtime);
152         uloop_timeout_cancel(&in->timeout);
153         if (in->halt) {
154                 /* no action */
155         } else if (in->restart) {
156                 instance_start(in);
157         } else if (in->respawn) {
158                 if (runtime < in->respawn_threshold)
159                         in->respawn_count++;
160                 else
161                         in->respawn_count = 0;
162                 if (in->respawn_count > in->respawn_retry) {
163                         LOG("Instance %s::%s s in a crash loop %d crashes, %ld seconds since last crash\n",
164                                                                 in->srv->name, in->name, in->respawn_count, runtime);
165                         in->restart = in->respawn = 0;
166                         in->halt = 1;
167                 } else {
168                         uloop_timeout_set(&in->timeout, in->respawn_timeout * 1000);
169                 }
170         }
171 }
172
173 void
174 instance_stop(struct service_instance *in)
175 {
176         if (!in->proc.pending)
177                 return;
178         in->halt = true;
179         in->restart = in->respawn = false;
180         kill(in->proc.pid, SIGTERM);
181 }
182
183 static void
184 instance_restart(struct service_instance *in)
185 {
186         if (!in->proc.pending)
187                 return;
188         in->halt = false;
189         in->restart = true;
190         kill(in->proc.pid, SIGTERM);
191 }
192
193 static bool
194 instance_config_changed(struct service_instance *in, struct service_instance *in_new)
195 {
196         if (!in->valid)
197                 return true;
198
199         if (!blob_attr_equal(in->command, in_new->command))
200                 return true;
201
202         if (!blobmsg_list_equal(&in->env, &in_new->env))
203                 return true;
204
205         if (!blobmsg_list_equal(&in->data, &in_new->data))
206                 return true;
207
208         if (!blobmsg_list_equal(&in->netdev, &in_new->netdev))
209                 return true;
210
211         if (!blobmsg_list_equal(&in->file, &in_new->file))
212                 return true;
213
214         if (in->nice != in_new->nice)
215                 return true;
216
217         return false;
218 }
219
220 static bool
221 instance_netdev_cmp(struct blobmsg_list_node *l1, struct blobmsg_list_node *l2)
222 {
223         struct instance_netdev *n1 = container_of(l1, struct instance_netdev, node);
224         struct instance_netdev *n2 = container_of(l2, struct instance_netdev, node);
225
226         return n1->ifindex == n2->ifindex;
227 }
228
229 static void
230 instance_netdev_update(struct blobmsg_list_node *l)
231 {
232         struct instance_netdev *n = container_of(l, struct instance_netdev, node);
233
234         n->ifindex = if_nametoindex(n->node.avl.key);
235 }
236
237 static bool
238 instance_file_cmp(struct blobmsg_list_node *l1, struct blobmsg_list_node *l2)
239 {
240         struct instance_file *f1 = container_of(l1, struct instance_file, node);
241         struct instance_file *f2 = container_of(l2, struct instance_file, node);
242
243         return !memcmp(f1->md5, f2->md5, sizeof(f1->md5));
244 }
245
246 static void
247 instance_file_update(struct blobmsg_list_node *l)
248 {
249         struct instance_file *f = container_of(l, struct instance_file, node);
250         md5_ctx_t md5;
251         char buf[256];
252         int len, fd;
253
254         memset(f->md5, 0, sizeof(f->md5));
255
256         fd = open(l->avl.key, O_RDONLY);
257         if (fd < 0)
258                 return;
259
260         md5_begin(&md5);
261         do {
262                 len = read(fd, buf, sizeof(buf));
263                 if (len < 0) {
264                         if (errno == EINTR)
265                                 continue;
266
267                         break;
268                 }
269                 if (!len)
270                         break;
271
272                 md5_hash(buf, len, &md5);
273         } while(1);
274
275         md5_end(f->md5, &md5);
276         close(fd);
277 }
278
279 static bool
280 instance_fill_array(struct blobmsg_list *l, struct blob_attr *cur, blobmsg_update_cb cb, bool array)
281 {
282         struct blobmsg_list_node *node;
283
284         if (!cur)
285                 return true;
286
287         if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING))
288                 return false;
289
290         blobmsg_list_fill(l, blobmsg_data(cur), blobmsg_data_len(cur), array);
291         if (cb) {
292                 blobmsg_list_for_each(l, node)
293                         cb(node);
294         }
295         return true;
296 }
297
298 static bool
299 instance_config_parse(struct service_instance *in)
300 {
301         struct blob_attr *tb[__INSTANCE_ATTR_MAX];
302         struct blob_attr *cur, *cur2;
303         int argc = 0;
304         int rem;
305
306         blobmsg_parse(instance_attr, __INSTANCE_ATTR_MAX, tb,
307                 blobmsg_data(in->config), blobmsg_data_len(in->config));
308
309         cur = tb[INSTANCE_ATTR_COMMAND];
310         if (!cur)
311                 return false;
312
313         if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING))
314                 return false;
315
316         blobmsg_for_each_attr(cur2, cur, rem) {
317                 argc++;
318                 break;
319         }
320         if (!argc)
321                 return false;
322
323         in->command = cur;
324
325         if (tb[INSTANCE_ATTR_RESPAWN]) {
326                 int i = 0;
327                 uint32_t vals[3] = { 3600, 5, 5};
328
329                 blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_RESPAWN], rem) {
330                         if ((i >= 3) && (blobmsg_type(cur2) == BLOBMSG_TYPE_STRING))
331                                 continue;
332                         vals[i] = atoi(blobmsg_get_string(cur2));
333                         i++;
334                 }
335                 in->respawn = true;
336                 in->respawn_count = 0;
337                 in->respawn_timeout = vals[0];
338                 in->respawn_threshold = vals[1];
339                 in->respawn_retry = vals[2];
340         }
341         if (tb[INSTANCE_ATTR_TRIGGER]) {
342                 in->trigger = malloc(blob_pad_len(tb[INSTANCE_ATTR_TRIGGER]));
343                 if (!in->trigger)
344                         return -1;
345                 memcpy(in->trigger, tb[INSTANCE_ATTR_TRIGGER], blob_pad_len(tb[INSTANCE_ATTR_TRIGGER]));
346                 trigger_add(in->trigger, in);
347         }
348
349         if ((cur = tb[INSTANCE_ATTR_NICE])) {
350                 in->nice = (int8_t) blobmsg_get_u32(cur);
351                 if (in->nice < -20 || in->nice > 20)
352                         return false;
353         }
354
355         if (!instance_fill_array(&in->env, tb[INSTANCE_ATTR_ENV], NULL, false))
356                 return false;
357
358         if (!instance_fill_array(&in->data, tb[INSTANCE_ATTR_DATA], NULL, false))
359                 return false;
360
361         if (!instance_fill_array(&in->netdev, tb[INSTANCE_ATTR_NETDEV], instance_netdev_update, true))
362                 return false;
363
364         if (!instance_fill_array(&in->file, tb[INSTANCE_ATTR_FILE], instance_file_update, true))
365                 return false;
366
367         return true;
368 }
369
370 static void
371 instance_config_cleanup(struct service_instance *in)
372 {
373         blobmsg_list_free(&in->env);
374         blobmsg_list_free(&in->data);
375         blobmsg_list_free(&in->netdev);
376 }
377
378 static void
379 instance_config_move(struct service_instance *in, struct service_instance *in_src)
380 {
381         instance_config_cleanup(in);
382         blobmsg_list_move(&in->env, &in_src->env);
383         blobmsg_list_move(&in->data, &in_src->data);
384         blobmsg_list_move(&in->netdev, &in_src->netdev);
385         in->trigger = in_src->trigger;
386         in->command = in_src->command;
387         in->name = in_src->name;
388         in->node.avl.key = in_src->node.avl.key;
389
390         free(in->config);
391         in->config = in_src->config;
392         in_src->config = NULL;
393 }
394
395 bool
396 instance_update(struct service_instance *in, struct service_instance *in_new)
397 {
398         bool changed = instance_config_changed(in, in_new);
399         bool running = in->proc.pending;
400
401         if (!changed && running)
402                 return false;
403
404         if (!running) {
405                 if (changed)
406                         instance_config_move(in, in_new);
407                 instance_start(in);
408         } else {
409                 instance_restart(in);
410                 instance_config_move(in, in_new);
411                 /* restart happens in the child callback handler */
412         }
413         return true;
414 }
415
416 void
417 instance_free(struct service_instance *in)
418 {
419         uloop_process_delete(&in->proc);
420         uloop_timeout_cancel(&in->timeout);
421         trigger_del(in);
422         free(in->trigger);
423         instance_config_cleanup(in);
424         free(in->config);
425         free(in);
426 }
427
428 void
429 instance_init(struct service_instance *in, struct service *s, struct blob_attr *config)
430 {
431         config = blob_memdup(config);
432         in->srv = s;
433         in->name = blobmsg_name(config);
434         in->config = config;
435         in->timeout.cb = instance_timeout;
436         in->proc.cb = instance_exit;
437
438         blobmsg_list_init(&in->netdev, struct instance_netdev, node, instance_netdev_cmp);
439         blobmsg_list_init(&in->file, struct instance_file, node, instance_file_cmp);
440         blobmsg_list_simple_init(&in->env);
441         blobmsg_list_simple_init(&in->data);
442         in->valid = instance_config_parse(in);
443 }
444
445 void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose)
446 {
447         void *i;
448         struct pid_info pi;
449
450         i = blobmsg_open_table(b, in->name);
451         blobmsg_add_u8(b, "running", in->proc.pending);
452         if (in->proc.pending)
453                 blobmsg_add_u32(b, "pid", in->proc.pid);
454         blobmsg_add_blob(b, in->command);
455
456         if (!avl_is_empty(&in->env.avl)) {
457                 struct blobmsg_list_node *var;
458                 void *e = blobmsg_open_table(b, "env");
459                 blobmsg_list_for_each(&in->env, var)
460                         blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data));
461                 blobmsg_close_table(b, e);
462         }
463
464         if (in->respawn) {
465                 void *r = blobmsg_open_table(b, "respawn");
466                 blobmsg_add_u32(b, "timeout", in->respawn_timeout);
467                 blobmsg_add_u32(b, "threshold", in->respawn_threshold);
468                 blobmsg_add_u32(b, "retry", in->respawn_retry);
469                 blobmsg_close_table(b, r);
470         }
471
472         if (verbose && in->trigger)
473                 blobmsg_add_blob(b, in->trigger);
474         if (!measure_process(in->proc.pid, &pi)) {
475                 struct timespec tp;
476                 long uptime;
477
478                 clock_gettime(CLOCK_MONOTONIC, &tp);
479                 uptime = tp.tv_sec - in->start.tv_sec;
480
481                 blobmsg_add_u8(b, "ppid", pi.ppid);
482                 blobmsg_add_u16(b, "uid", pi.uid);
483                 blobmsg_add_u32(b, "fdcount", pi.fdcount);
484                 blobmsg_add_u32(b, "vmsize", pi.vmsize);
485                 blobmsg_add_u32(b, "uptime", uptime);
486         }
487         blobmsg_close_table(b, i);
488 }