syslog() is a blocking call on eglibc. as procd provides the actual syslog, weneed...
[project/procd.git] / instance.c
index 41b9419..7895a04 100644 (file)
@@ -1,13 +1,39 @@
+/*
+ * Copyright (C) 2013 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/if.h>
 #include <unistd.h>
+#include <stdint.h>
+#include <fcntl.h>
 
 #include "procd.h"
 #include "service.h"
 #include "instance.h"
+#include "md5.h"
 
 enum {
        INSTANCE_ATTR_COMMAND,
        INSTANCE_ATTR_ENV,
        INSTANCE_ATTR_DATA,
+       INSTANCE_ATTR_NETDEV,
+       INSTANCE_ATTR_FILE,
+       INSTANCE_ATTR_TRIGGER,
+       INSTANCE_ATTR_RESPAWN,
+       INSTANCE_ATTR_NICE,
        __INSTANCE_ATTR_MAX
 };
 
@@ -15,6 +41,21 @@ static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = {
        [INSTANCE_ATTR_COMMAND] = { "command", BLOBMSG_TYPE_ARRAY },
        [INSTANCE_ATTR_ENV] = { "env", BLOBMSG_TYPE_TABLE },
        [INSTANCE_ATTR_DATA] = { "data", BLOBMSG_TYPE_TABLE },
+       [INSTANCE_ATTR_NETDEV] = { "netdev", BLOBMSG_TYPE_ARRAY },
+       [INSTANCE_ATTR_FILE] = { "file", BLOBMSG_TYPE_ARRAY },
+       [INSTANCE_ATTR_TRIGGER] = { "triggers", BLOBMSG_TYPE_ARRAY },
+       [INSTANCE_ATTR_RESPAWN] = { "respawn", BLOBMSG_TYPE_ARRAY },
+       [INSTANCE_ATTR_NICE] = { "nice", BLOBMSG_TYPE_INT32 },
+};
+
+struct instance_netdev {
+       struct blobmsg_list_node node;
+       int ifindex;
+};
+
+struct instance_file {
+       struct blobmsg_list_node node;
+       uint32_t md5[4];
 };
 
 static void
@@ -24,13 +65,16 @@ instance_run(struct service_instance *in)
        struct blob_attr *cur;
        char **argv;
        int argc = 1; /* NULL terminated */
-       int rem;
+       int rem, fd;
+
+       if (in->nice)
+               setpriority(PRIO_PROCESS, 0, in->nice);
 
        blobmsg_for_each_attr(cur, in->command, rem)
                argc++;
 
        blobmsg_list_for_each(&in->env, var)
-               putenv(blobmsg_data(var->data));
+               setenv(blobmsg_name(var->data), blobmsg_data(var->data), 1);
 
        argv = alloca(sizeof(char *) * argc);
        argc = 0;
@@ -39,6 +83,14 @@ instance_run(struct service_instance *in)
                argv[argc++] = blobmsg_data(cur);
 
        argv[argc] = NULL;
+       fd = open("/dev/null", O_RDWR);
+       if (fd > -1) {
+               dup2(fd, STDIN_FILENO);
+               dup2(fd, STDOUT_FILENO);
+               dup2(fd, STDERR_FILENO);
+               if (fd > STDERR_FILENO)
+                       close(fd);
+       }
        execvp(argv[0], argv);
        exit(127);
 }
@@ -52,6 +104,8 @@ instance_start(struct service_instance *in)
                return;
 
        in->restart = false;
+       in->halt = !in->respawn;
+
        if (!in->valid)
                return;
 
@@ -60,11 +114,14 @@ instance_start(struct service_instance *in)
                return;
 
        if (!pid) {
+               uloop_done();
                instance_run(in);
                return;
        }
 
+       DEBUG(1, "Started instance %s::%s\n", in->srv->name, in->name);
        in->proc.pid = pid;
+       clock_gettime(CLOCK_MONOTONIC, &in->start);
        uloop_process_add(&in->proc);
 }
 
@@ -74,28 +131,62 @@ instance_timeout(struct uloop_timeout *t)
        struct service_instance *in;
 
        in = container_of(t, struct service_instance, timeout);
-       kill(in->proc.pid, SIGKILL);
-       uloop_process_delete(&in->proc);
-       in->proc.cb(&in->proc, -1);
+
+       if (!in->halt && (in->restart || in->respawn))
+               instance_start(in);
 }
 
 static void
 instance_exit(struct uloop_process *p, int ret)
 {
        struct service_instance *in;
+       struct timespec tp;
+       long runtime;
 
        in = container_of(p, struct service_instance, proc);
+
+       clock_gettime(CLOCK_MONOTONIC, &tp);
+       runtime = tp.tv_sec - in->start.tv_sec;
+
+       DEBUG(1, "Instance %s::%s exit with error code %d after %ld seconds\n", in->srv->name, in->name, ret, runtime);
        uloop_timeout_cancel(&in->timeout);
-       if (in->restart)
+       if (in->halt) {
+               /* no action */
+       } else if (in->restart) {
                instance_start(in);
+       } else if (in->respawn) {
+               if (runtime < in->respawn_threshold)
+                       in->respawn_count++;
+               else
+                       in->respawn_count = 0;
+               if (in->respawn_count > in->respawn_retry) {
+                       LOG("Instance %s::%s s in a crash loop %d crashes, %ld seconds since last crash\n",
+                                                               in->srv->name, in->name, in->respawn_count, runtime);
+                       in->restart = in->respawn = 0;
+                       in->halt = 1;
+               } else {
+                       uloop_timeout_set(&in->timeout, in->respawn_timeout * 1000);
+               }
+       }
 }
 
 void
-instance_stop(struct service_instance *in, bool restart)
+instance_stop(struct service_instance *in)
 {
        if (!in->proc.pending)
                return;
+       in->halt = true;
+       in->restart = in->respawn = false;
+       kill(in->proc.pid, SIGTERM);
+}
 
+static void
+instance_restart(struct service_instance *in)
+{
+       if (!in->proc.pending)
+               return;
+       in->halt = false;
+       in->restart = true;
        kill(in->proc.pid, SIGTERM);
 }
 
@@ -114,10 +205,97 @@ instance_config_changed(struct service_instance *in, struct service_instance *in
        if (!blobmsg_list_equal(&in->data, &in_new->data))
                return true;
 
+       if (!blobmsg_list_equal(&in->netdev, &in_new->netdev))
+               return true;
+
+       if (!blobmsg_list_equal(&in->file, &in_new->file))
+               return true;
+
+       if (in->nice != in_new->nice)
+               return true;
+
        return false;
 }
 
 static bool
+instance_netdev_cmp(struct blobmsg_list_node *l1, struct blobmsg_list_node *l2)
+{
+       struct instance_netdev *n1 = container_of(l1, struct instance_netdev, node);
+       struct instance_netdev *n2 = container_of(l2, struct instance_netdev, node);
+
+       return n1->ifindex == n2->ifindex;
+}
+
+static void
+instance_netdev_update(struct blobmsg_list_node *l)
+{
+       struct instance_netdev *n = container_of(l, struct instance_netdev, node);
+
+       n->ifindex = if_nametoindex(n->node.avl.key);
+}
+
+static bool
+instance_file_cmp(struct blobmsg_list_node *l1, struct blobmsg_list_node *l2)
+{
+       struct instance_file *f1 = container_of(l1, struct instance_file, node);
+       struct instance_file *f2 = container_of(l2, struct instance_file, node);
+
+       return !memcmp(f1->md5, f2->md5, sizeof(f1->md5));
+}
+
+static void
+instance_file_update(struct blobmsg_list_node *l)
+{
+       struct instance_file *f = container_of(l, struct instance_file, node);
+       md5_ctx_t md5;
+       char buf[256];
+       int len, fd;
+
+       memset(f->md5, 0, sizeof(f->md5));
+
+       fd = open(l->avl.key, O_RDONLY);
+       if (fd < 0)
+               return;
+
+       md5_begin(&md5);
+       do {
+               len = read(fd, buf, sizeof(buf));
+               if (len < 0) {
+                       if (errno == EINTR)
+                               continue;
+
+                       break;
+               }
+               if (!len)
+                       break;
+
+               md5_hash(buf, len, &md5);
+       } while(1);
+
+       md5_end(f->md5, &md5);
+       close(fd);
+}
+
+static bool
+instance_fill_array(struct blobmsg_list *l, struct blob_attr *cur, blobmsg_update_cb cb, bool array)
+{
+       struct blobmsg_list_node *node;
+
+       if (!cur)
+               return true;
+
+       if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING))
+               return false;
+
+       blobmsg_list_fill(l, blobmsg_data(cur), blobmsg_data_len(cur), array);
+       if (cb) {
+               blobmsg_list_for_each(l, node)
+                       cb(node);
+       }
+       return true;
+}
+
+static bool
 instance_config_parse(struct service_instance *in)
 {
        struct blob_attr *tb[__INSTANCE_ATTR_MAX];
@@ -144,18 +322,48 @@ instance_config_parse(struct service_instance *in)
 
        in->command = cur;
 
-       if ((cur = tb[INSTANCE_ATTR_ENV])) {
-               if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING))
-                       return false;
-               blobmsg_list_fill(&in->env, blobmsg_data(cur), blobmsg_data_len(cur));
+       if (tb[INSTANCE_ATTR_RESPAWN]) {
+               int i = 0;
+               uint32_t vals[3] = { 3600, 5, 5};
+
+               blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_RESPAWN], rem) {
+                       if ((i >= 3) && (blobmsg_type(cur2) == BLOBMSG_TYPE_STRING))
+                               continue;
+                       vals[i] = atoi(blobmsg_get_string(cur2));
+                       i++;
+               }
+               in->respawn = true;
+               in->respawn_count = 0;
+               in->respawn_threshold = vals[0];
+               in->respawn_timeout = vals[1];
+               in->respawn_retry = vals[2];
+       }
+       if (tb[INSTANCE_ATTR_TRIGGER]) {
+               in->trigger = malloc(blob_pad_len(tb[INSTANCE_ATTR_TRIGGER]));
+               if (!in->trigger)
+                       return -1;
+               memcpy(in->trigger, tb[INSTANCE_ATTR_TRIGGER], blob_pad_len(tb[INSTANCE_ATTR_TRIGGER]));
+               trigger_add(in->trigger, in);
        }
 
-       if ((cur = tb[INSTANCE_ATTR_DATA])) {
-               if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING))
+       if ((cur = tb[INSTANCE_ATTR_NICE])) {
+               in->nice = (int8_t) blobmsg_get_u32(cur);
+               if (in->nice < -20 || in->nice > 20)
                        return false;
-               blobmsg_list_fill(&in->data, blobmsg_data(cur), blobmsg_data_len(cur));
        }
 
+       if (!instance_fill_array(&in->env, tb[INSTANCE_ATTR_ENV], NULL, false))
+               return false;
+
+       if (!instance_fill_array(&in->data, tb[INSTANCE_ATTR_DATA], NULL, false))
+               return false;
+
+       if (!instance_fill_array(&in->netdev, tb[INSTANCE_ATTR_NETDEV], instance_netdev_update, true))
+               return false;
+
+       if (!instance_fill_array(&in->file, tb[INSTANCE_ATTR_FILE], instance_file_update, true))
+               return false;
+
        return true;
 }
 
@@ -164,6 +372,7 @@ instance_config_cleanup(struct service_instance *in)
 {
        blobmsg_list_free(&in->env);
        blobmsg_list_free(&in->data);
+       blobmsg_list_free(&in->netdev);
 }
 
 static void
@@ -172,22 +381,35 @@ instance_config_move(struct service_instance *in, struct service_instance *in_sr
        instance_config_cleanup(in);
        blobmsg_list_move(&in->env, &in_src->env);
        blobmsg_list_move(&in->data, &in_src->data);
+       blobmsg_list_move(&in->netdev, &in_src->netdev);
+       in->trigger = in_src->trigger;
        in->command = in_src->command;
        in->name = in_src->name;
+       in->node.avl.key = in_src->node.avl.key;
+
+       free(in->config);
+       in->config = in_src->config;
+       in_src->config = NULL;
 }
 
 bool
 instance_update(struct service_instance *in, struct service_instance *in_new)
 {
        bool changed = instance_config_changed(in, in_new);
+       bool running = in->proc.pending;
 
-       in->config = in_new->config;
-       if (!changed)
+       if (!changed && running)
                return false;
 
-       in->restart = true;
-       instance_stop(in, true);
-       instance_config_move(in, in_new);
+       if (!running) {
+               if (changed)
+                       instance_config_move(in, in_new);
+               instance_start(in);
+       } else {
+               instance_restart(in);
+               instance_config_move(in, in_new);
+               /* restart happens in the child callback handler */
+       }
        return true;
 }
 
@@ -196,31 +418,71 @@ instance_free(struct service_instance *in)
 {
        uloop_process_delete(&in->proc);
        uloop_timeout_cancel(&in->timeout);
+       trigger_del(in);
+       free(in->trigger);
        instance_config_cleanup(in);
+       free(in->config);
        free(in);
 }
 
 void
-instance_init(struct service_instance *in, struct blob_attr *config)
+instance_init(struct service_instance *in, struct service *s, struct blob_attr *config)
 {
+       config = blob_memdup(config);
+       in->srv = s;
        in->name = blobmsg_name(config);
        in->config = config;
        in->timeout.cb = instance_timeout;
        in->proc.cb = instance_exit;
 
+       blobmsg_list_init(&in->netdev, struct instance_netdev, node, instance_netdev_cmp);
+       blobmsg_list_init(&in->file, struct instance_file, node, instance_file_cmp);
        blobmsg_list_simple_init(&in->env);
        blobmsg_list_simple_init(&in->data);
        in->valid = instance_config_parse(in);
 }
 
-void instance_dump(struct blob_buf *b, struct service_instance *in)
+void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose)
 {
        void *i;
+       struct pid_info pi;
 
        i = blobmsg_open_table(b, in->name);
        blobmsg_add_u8(b, "running", in->proc.pending);
        if (in->proc.pending)
                blobmsg_add_u32(b, "pid", in->proc.pid);
        blobmsg_add_blob(b, in->command);
+
+       if (!avl_is_empty(&in->env.avl)) {
+               struct blobmsg_list_node *var;
+               void *e = blobmsg_open_table(b, "env");
+               blobmsg_list_for_each(&in->env, var)
+                       blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data));
+               blobmsg_close_table(b, e);
+       }
+
+       if (in->respawn) {
+               void *r = blobmsg_open_table(b, "respawn");
+               blobmsg_add_u32(b, "timeout", in->respawn_timeout);
+               blobmsg_add_u32(b, "threshold", in->respawn_threshold);
+               blobmsg_add_u32(b, "retry", in->respawn_retry);
+               blobmsg_close_table(b, r);
+       }
+
+       if (verbose && in->trigger)
+               blobmsg_add_blob(b, in->trigger);
+       if (!measure_process(in->proc.pid, &pi)) {
+               struct timespec tp;
+               long uptime;
+
+               clock_gettime(CLOCK_MONOTONIC, &tp);
+               uptime = tp.tv_sec - in->start.tv_sec;
+
+               blobmsg_add_u8(b, "ppid", pi.ppid);
+               blobmsg_add_u16(b, "uid", pi.uid);
+               blobmsg_add_u32(b, "fdcount", pi.fdcount);
+               blobmsg_add_u32(b, "vmsize", pi.vmsize);
+               blobmsg_add_u32(b, "uptime", uptime);
+       }
        blobmsg_close_table(b, i);
 }