X-Git-Url: https://git.archive.openwrt.org/?p=project%2Fprocd.git;a=blobdiff_plain;f=instance.c;h=7895a04cc7b03cb74ff3749112bd75c5327e83b1;hp=b2cdfa1dd57397d4e36d7886d8f5bafa5e710f23;hb=8fd40e8352bac9992ca4ebd50998d4e961f5404b;hpb=8158e9052afbf2ee47de645167b90b6e7fc7d86d diff --git a/instance.c b/instance.c index b2cdfa1..7895a04 100644 --- a/instance.c +++ b/instance.c @@ -31,6 +31,8 @@ enum { INSTANCE_ATTR_DATA, INSTANCE_ATTR_NETDEV, INSTANCE_ATTR_FILE, + INSTANCE_ATTR_TRIGGER, + INSTANCE_ATTR_RESPAWN, INSTANCE_ATTR_NICE, __INSTANCE_ATTR_MAX }; @@ -41,6 +43,8 @@ static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = { [INSTANCE_ATTR_DATA] = { "data", BLOBMSG_TYPE_TABLE }, [INSTANCE_ATTR_NETDEV] = { "netdev", BLOBMSG_TYPE_ARRAY }, [INSTANCE_ATTR_FILE] = { "file", BLOBMSG_TYPE_ARRAY }, + [INSTANCE_ATTR_TRIGGER] = { "triggers", BLOBMSG_TYPE_ARRAY }, + [INSTANCE_ATTR_RESPAWN] = { "respawn", BLOBMSG_TYPE_ARRAY }, [INSTANCE_ATTR_NICE] = { "nice", BLOBMSG_TYPE_INT32 }, }; @@ -61,7 +65,7 @@ instance_run(struct service_instance *in) struct blob_attr *cur; char **argv; int argc = 1; /* NULL terminated */ - int rem; + int rem, fd; if (in->nice) setpriority(PRIO_PROCESS, 0, in->nice); @@ -79,6 +83,14 @@ instance_run(struct service_instance *in) argv[argc++] = blobmsg_data(cur); argv[argc] = NULL; + fd = open("/dev/null", O_RDWR); + if (fd > -1) { + dup2(fd, STDIN_FILENO); + dup2(fd, STDOUT_FILENO); + dup2(fd, STDERR_FILENO); + if (fd > STDERR_FILENO) + close(fd); + } execvp(argv[0], argv); exit(127); } @@ -92,6 +104,8 @@ instance_start(struct service_instance *in) return; in->restart = false; + in->halt = !in->respawn; + if (!in->valid) return; @@ -100,12 +114,14 @@ instance_start(struct service_instance *in) return; if (!pid) { + uloop_done(); instance_run(in); return; } DEBUG(1, "Started instance %s::%s\n", in->srv->name, in->name); in->proc.pid = pid; + clock_gettime(CLOCK_MONOTONIC, &in->start); uloop_process_add(&in->proc); } @@ -115,29 +131,62 @@ instance_timeout(struct uloop_timeout *t) struct service_instance *in; in = container_of(t, struct service_instance, timeout); - kill(in->proc.pid, SIGKILL); - uloop_process_delete(&in->proc); - in->proc.cb(&in->proc, -1); + + if (!in->halt && (in->restart || in->respawn)) + instance_start(in); } static void instance_exit(struct uloop_process *p, int ret) { struct service_instance *in; + struct timespec tp; + long runtime; in = container_of(p, struct service_instance, proc); - LOG("Instance %s::%s exit with error code %d\n", in->srv->name, in->name, ret); + + clock_gettime(CLOCK_MONOTONIC, &tp); + runtime = tp.tv_sec - in->start.tv_sec; + + DEBUG(1, "Instance %s::%s exit with error code %d after %ld seconds\n", in->srv->name, in->name, ret, runtime); uloop_timeout_cancel(&in->timeout); - if (in->restart) + if (in->halt) { + /* no action */ + } else if (in->restart) { instance_start(in); + } else if (in->respawn) { + if (runtime < in->respawn_threshold) + in->respawn_count++; + else + in->respawn_count = 0; + if (in->respawn_count > in->respawn_retry) { + LOG("Instance %s::%s s in a crash loop %d crashes, %ld seconds since last crash\n", + in->srv->name, in->name, in->respawn_count, runtime); + in->restart = in->respawn = 0; + in->halt = 1; + } else { + uloop_timeout_set(&in->timeout, in->respawn_timeout * 1000); + } + } } void -instance_stop(struct service_instance *in, bool restart) +instance_stop(struct service_instance *in) { if (!in->proc.pending) return; + in->halt = true; + in->restart = in->respawn = false; + kill(in->proc.pid, SIGTERM); +} +static void +instance_restart(struct service_instance *in) +{ + if (!in->proc.pending) + return; + in->halt = false; + in->restart = true; kill(in->proc.pid, SIGTERM); } @@ -273,6 +322,30 @@ instance_config_parse(struct service_instance *in) in->command = cur; + if (tb[INSTANCE_ATTR_RESPAWN]) { + int i = 0; + uint32_t vals[3] = { 3600, 5, 5}; + + blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_RESPAWN], rem) { + if ((i >= 3) && (blobmsg_type(cur2) == BLOBMSG_TYPE_STRING)) + continue; + vals[i] = atoi(blobmsg_get_string(cur2)); + i++; + } + in->respawn = true; + in->respawn_count = 0; + in->respawn_threshold = vals[0]; + in->respawn_timeout = vals[1]; + in->respawn_retry = vals[2]; + } + if (tb[INSTANCE_ATTR_TRIGGER]) { + in->trigger = malloc(blob_pad_len(tb[INSTANCE_ATTR_TRIGGER])); + if (!in->trigger) + return -1; + memcpy(in->trigger, tb[INSTANCE_ATTR_TRIGGER], blob_pad_len(tb[INSTANCE_ATTR_TRIGGER])); + trigger_add(in->trigger, in); + } + if ((cur = tb[INSTANCE_ATTR_NICE])) { in->nice = (int8_t) blobmsg_get_u32(cur); if (in->nice < -20 || in->nice > 20) @@ -309,6 +382,7 @@ instance_config_move(struct service_instance *in, struct service_instance *in_sr blobmsg_list_move(&in->env, &in_src->env); blobmsg_list_move(&in->data, &in_src->data); blobmsg_list_move(&in->netdev, &in_src->netdev); + in->trigger = in_src->trigger; in->command = in_src->command; in->name = in_src->name; in->node.avl.key = in_src->node.avl.key; @@ -322,13 +396,20 @@ bool instance_update(struct service_instance *in, struct service_instance *in_new) { bool changed = instance_config_changed(in, in_new); + bool running = in->proc.pending; - if (!changed) + if (!changed && running) return false; - in->restart = true; - instance_stop(in, true); - instance_config_move(in, in_new); + if (!running) { + if (changed) + instance_config_move(in, in_new); + instance_start(in); + } else { + instance_restart(in); + instance_config_move(in, in_new); + /* restart happens in the child callback handler */ + } return true; } @@ -337,6 +418,8 @@ instance_free(struct service_instance *in) { uloop_process_delete(&in->proc); uloop_timeout_cancel(&in->timeout); + trigger_del(in); + free(in->trigger); instance_config_cleanup(in); free(in->config); free(in); @@ -359,14 +442,47 @@ instance_init(struct service_instance *in, struct service *s, struct blob_attr * in->valid = instance_config_parse(in); } -void instance_dump(struct blob_buf *b, struct service_instance *in) +void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose) { void *i; + struct pid_info pi; i = blobmsg_open_table(b, in->name); blobmsg_add_u8(b, "running", in->proc.pending); if (in->proc.pending) blobmsg_add_u32(b, "pid", in->proc.pid); blobmsg_add_blob(b, in->command); + + if (!avl_is_empty(&in->env.avl)) { + struct blobmsg_list_node *var; + void *e = blobmsg_open_table(b, "env"); + blobmsg_list_for_each(&in->env, var) + blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data)); + blobmsg_close_table(b, e); + } + + if (in->respawn) { + void *r = blobmsg_open_table(b, "respawn"); + blobmsg_add_u32(b, "timeout", in->respawn_timeout); + blobmsg_add_u32(b, "threshold", in->respawn_threshold); + blobmsg_add_u32(b, "retry", in->respawn_retry); + blobmsg_close_table(b, r); + } + + if (verbose && in->trigger) + blobmsg_add_blob(b, in->trigger); + if (!measure_process(in->proc.pid, &pi)) { + struct timespec tp; + long uptime; + + clock_gettime(CLOCK_MONOTONIC, &tp); + uptime = tp.tv_sec - in->start.tv_sec; + + blobmsg_add_u8(b, "ppid", pi.ppid); + blobmsg_add_u16(b, "uid", pi.uid); + blobmsg_add_u32(b, "fdcount", pi.fdcount); + blobmsg_add_u32(b, "vmsize", pi.vmsize); + blobmsg_add_u32(b, "uptime", uptime); + } blobmsg_close_table(b, i); }