X-Git-Url: http://git.archive.openwrt.org/?p=project%2Fprocd.git;a=blobdiff_plain;f=instance.c;h=7895a04cc7b03cb74ff3749112bd75c5327e83b1;hp=d61bb332e2c7047b99f5d3d069576df63e224557;hb=eba428f6672068d819d6296db3f635e6ac5a8be7;hpb=85a5ce27f5f02e4ddab6916d69ba7f7375d289d4 diff --git a/instance.c b/instance.c index d61bb33..7895a04 100644 --- a/instance.c +++ b/instance.c @@ -32,6 +32,7 @@ enum { INSTANCE_ATTR_NETDEV, INSTANCE_ATTR_FILE, INSTANCE_ATTR_TRIGGER, + INSTANCE_ATTR_RESPAWN, INSTANCE_ATTR_NICE, __INSTANCE_ATTR_MAX }; @@ -43,6 +44,7 @@ static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = { [INSTANCE_ATTR_NETDEV] = { "netdev", BLOBMSG_TYPE_ARRAY }, [INSTANCE_ATTR_FILE] = { "file", BLOBMSG_TYPE_ARRAY }, [INSTANCE_ATTR_TRIGGER] = { "triggers", BLOBMSG_TYPE_ARRAY }, + [INSTANCE_ATTR_RESPAWN] = { "respawn", BLOBMSG_TYPE_ARRAY }, [INSTANCE_ATTR_NICE] = { "nice", BLOBMSG_TYPE_INT32 }, }; @@ -102,6 +104,8 @@ instance_start(struct service_instance *in) return; in->restart = false; + in->halt = !in->respawn; + if (!in->valid) return; @@ -117,6 +121,7 @@ instance_start(struct service_instance *in) DEBUG(1, "Started instance %s::%s\n", in->srv->name, in->name); in->proc.pid = pid; + clock_gettime(CLOCK_MONOTONIC, &in->start); uloop_process_add(&in->proc); } @@ -126,29 +131,62 @@ instance_timeout(struct uloop_timeout *t) struct service_instance *in; in = container_of(t, struct service_instance, timeout); - kill(in->proc.pid, SIGKILL); - uloop_process_delete(&in->proc); - in->proc.cb(&in->proc, -1); + + if (!in->halt && (in->restart || in->respawn)) + instance_start(in); } static void instance_exit(struct uloop_process *p, int ret) { struct service_instance *in; + struct timespec tp; + long runtime; in = container_of(p, struct service_instance, proc); - DEBUG(1, "Instance %s::%s exit with error code %d\n", in->srv->name, in->name, ret); + + clock_gettime(CLOCK_MONOTONIC, &tp); + runtime = tp.tv_sec - in->start.tv_sec; + + DEBUG(1, "Instance %s::%s exit with error code %d after %ld seconds\n", in->srv->name, in->name, ret, runtime); uloop_timeout_cancel(&in->timeout); - if (in->restart) + if (in->halt) { + /* no action */ + } else if (in->restart) { instance_start(in); + } else if (in->respawn) { + if (runtime < in->respawn_threshold) + in->respawn_count++; + else + in->respawn_count = 0; + if (in->respawn_count > in->respawn_retry) { + LOG("Instance %s::%s s in a crash loop %d crashes, %ld seconds since last crash\n", + in->srv->name, in->name, in->respawn_count, runtime); + in->restart = in->respawn = 0; + in->halt = 1; + } else { + uloop_timeout_set(&in->timeout, in->respawn_timeout * 1000); + } + } } void -instance_stop(struct service_instance *in, bool restart) +instance_stop(struct service_instance *in) { if (!in->proc.pending) return; + in->halt = true; + in->restart = in->respawn = false; + kill(in->proc.pid, SIGTERM); +} +static void +instance_restart(struct service_instance *in) +{ + if (!in->proc.pending) + return; + in->halt = false; + in->restart = true; kill(in->proc.pid, SIGTERM); } @@ -283,11 +321,31 @@ instance_config_parse(struct service_instance *in) return false; in->command = cur; - in->trigger = tb[INSTANCE_ATTR_TRIGGER]; - if (in->trigger) { + if (tb[INSTANCE_ATTR_RESPAWN]) { + int i = 0; + uint32_t vals[3] = { 3600, 5, 5}; + + blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_RESPAWN], rem) { + if ((i >= 3) && (blobmsg_type(cur2) == BLOBMSG_TYPE_STRING)) + continue; + vals[i] = atoi(blobmsg_get_string(cur2)); + i++; + } + in->respawn = true; + in->respawn_count = 0; + in->respawn_threshold = vals[0]; + in->respawn_timeout = vals[1]; + in->respawn_retry = vals[2]; + } + if (tb[INSTANCE_ATTR_TRIGGER]) { + in->trigger = malloc(blob_pad_len(tb[INSTANCE_ATTR_TRIGGER])); + if (!in->trigger) + return -1; + memcpy(in->trigger, tb[INSTANCE_ATTR_TRIGGER], blob_pad_len(tb[INSTANCE_ATTR_TRIGGER])); trigger_add(in->trigger, in); } + if ((cur = tb[INSTANCE_ATTR_NICE])) { in->nice = (int8_t) blobmsg_get_u32(cur); if (in->nice < -20 || in->nice > 20) @@ -348,9 +406,9 @@ instance_update(struct service_instance *in, struct service_instance *in_new) instance_config_move(in, in_new); instance_start(in); } else { - in->restart = true; - instance_stop(in, true); + instance_restart(in); instance_config_move(in, in_new); + /* restart happens in the child callback handler */ } return true; } @@ -361,6 +419,7 @@ instance_free(struct service_instance *in) uloop_process_delete(&in->proc); uloop_timeout_cancel(&in->timeout); trigger_del(in); + free(in->trigger); instance_config_cleanup(in); free(in->config); free(in); @@ -386,13 +445,44 @@ instance_init(struct service_instance *in, struct service *s, struct blob_attr * void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose) { void *i; + struct pid_info pi; i = blobmsg_open_table(b, in->name); blobmsg_add_u8(b, "running", in->proc.pending); if (in->proc.pending) blobmsg_add_u32(b, "pid", in->proc.pid); blobmsg_add_blob(b, in->command); + + if (!avl_is_empty(&in->env.avl)) { + struct blobmsg_list_node *var; + void *e = blobmsg_open_table(b, "env"); + blobmsg_list_for_each(&in->env, var) + blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data)); + blobmsg_close_table(b, e); + } + + if (in->respawn) { + void *r = blobmsg_open_table(b, "respawn"); + blobmsg_add_u32(b, "timeout", in->respawn_timeout); + blobmsg_add_u32(b, "threshold", in->respawn_threshold); + blobmsg_add_u32(b, "retry", in->respawn_retry); + blobmsg_close_table(b, r); + } + if (verbose && in->trigger) blobmsg_add_blob(b, in->trigger); + if (!measure_process(in->proc.pid, &pi)) { + struct timespec tp; + long uptime; + + clock_gettime(CLOCK_MONOTONIC, &tp); + uptime = tp.tv_sec - in->start.tv_sec; + + blobmsg_add_u8(b, "ppid", pi.ppid); + blobmsg_add_u16(b, "uid", pi.uid); + blobmsg_add_u32(b, "fdcount", pi.fdcount); + blobmsg_add_u32(b, "vmsize", pi.vmsize); + blobmsg_add_u32(b, "uptime", uptime); + } blobmsg_close_table(b, i); }