interface: report link up events for force_link interfaces
[project/netifd.git] / interface-ip.c
index 183935e..26a2865 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * netifd - network interface daemon
  * Copyright (C) 2012 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2012 Steven Barth <steven@midlink.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
-#include <unistd.h>
 
+#include <limits.h>
 #include <arpa/inet.h>
+#include <netinet/in.h>
 
 #include "netifd.h"
 #include "device.h"
@@ -33,6 +35,11 @@ enum {
        ROUTE_GATEWAY,
        ROUTE_METRIC,
        ROUTE_MTU,
+       ROUTE_VALID,
+       ROUTE_TABLE,
+       ROUTE_SOURCE,
+       ROUTE_ONLINK,
+       ROUTE_TYPE,
        __ROUTE_MAX
 };
 
@@ -43,9 +50,14 @@ static const struct blobmsg_policy route_attr[__ROUTE_MAX] = {
        [ROUTE_GATEWAY] = { .name = "gateway", .type = BLOBMSG_TYPE_STRING },
        [ROUTE_METRIC] = { .name = "metric", .type = BLOBMSG_TYPE_INT32 },
        [ROUTE_MTU] = { .name = "mtu", .type = BLOBMSG_TYPE_INT32 },
+       [ROUTE_TABLE] = { .name = "table", .type = BLOBMSG_TYPE_STRING },
+       [ROUTE_VALID] = { .name = "valid", .type = BLOBMSG_TYPE_INT32 },
+       [ROUTE_SOURCE] = { .name = "source", .type = BLOBMSG_TYPE_STRING },
+       [ROUTE_ONLINK] = { .name = "onlink", .type = BLOBMSG_TYPE_BOOL },
+       [ROUTE_TYPE] = { .name = "type", .type = BLOBMSG_TYPE_STRING }
 };
 
-const struct config_param_list route_attr_list = {
+const struct uci_blob_param_list route_attr_list = {
        .n_params = __ROUTE_MAX,
        .params = route_attr,
 };
@@ -53,6 +65,7 @@ const struct config_param_list route_attr_list = {
 
 struct list_head prefixes = LIST_HEAD_INIT(prefixes);
 static struct device_prefix *ula_prefix = NULL;
+static struct uloop_timeout valid_until_timeout;
 
 
 static void
@@ -62,8 +75,8 @@ clear_if_addr(union if_addr *a, int mask)
        uint8_t m_clear = (1 << (m_bytes * 8 - mask)) - 1;
        uint8_t *p = (uint8_t *) a;
 
-       if (m_bytes < sizeof(a))
-               memset(p + m_bytes, 0, sizeof(a) - m_bytes);
+       if (m_bytes < sizeof(*a))
+               memset(p + m_bytes, 0, sizeof(*a) - m_bytes);
 
        p[m_bytes - 1] &= ~m_clear;
 }
@@ -84,6 +97,65 @@ match_if_addr(union if_addr *a1, union if_addr *a2, int mask)
        return !memcmp(p1, p2, sizeof(*p1));
 }
 
+static int set_ip_source_policy(bool add, bool v6, unsigned int priority,
+               const union if_addr *addr, uint8_t mask, unsigned int table,
+               struct interface *in_iface, const char *action, bool src)
+{
+       struct iprule rule = {
+               .flags = IPRULE_PRIORITY,
+               .priority = priority
+       };
+
+       if (addr) {
+               if (src) {
+                       rule.flags |= IPRULE_SRC;
+                       rule.src_addr = *addr;
+                       rule.src_mask = mask;
+               } else {
+                       rule.flags |= IPRULE_DEST;
+                       rule.dest_addr = *addr;
+                       rule.dest_mask = mask;
+               }
+       }
+
+       if (table) {
+               rule.flags |= IPRULE_LOOKUP;
+               rule.lookup = table;
+
+               if (!rule.lookup)
+                       return 0;
+       } else if (action) {
+               rule.flags |= IPRULE_ACTION;
+               system_resolve_iprule_action(action, &rule.action);
+       }
+
+       if (in_iface && in_iface->l3_dev.dev) {
+               rule.flags |= IPRULE_IN;
+               strcpy(rule.in_dev, in_iface->l3_dev.dev->ifname);
+       }
+
+       rule.flags |= (v6) ? IPRULE_INET6 : IPRULE_INET4;
+
+       return (add) ? system_add_iprule(&rule) : system_del_iprule(&rule);
+}
+
+static int set_ip_lo_policy(bool add, bool v6, struct interface *iface)
+{
+       struct iprule rule = {
+               .flags = IPRULE_IN | IPRULE_LOOKUP | IPRULE_PRIORITY,
+               .priority = IPRULE_PRIORITY_NW + iface->l3_dev.dev->ifindex,
+               .lookup = (v6) ? iface->ip6table : iface->ip4table,
+               .in_dev = "lo"
+       };
+
+       if (!rule.lookup)
+               return 0;
+
+       rule.flags |= (v6) ? IPRULE_INET6 : IPRULE_INET4;
+
+       return (add) ? system_add_iprule(&rule) : system_del_iprule(&rule);
+}
+
 static bool
 __find_ip_addr_target(struct interface_ip_settings *ip, union if_addr *a, bool v6)
 {
@@ -96,7 +168,13 @@ __find_ip_addr_target(struct interface_ip_settings *ip, union if_addr *a, bool v
                if (v6 != ((addr->flags & DEVADDR_FAMILY) == DEVADDR_INET6))
                        continue;
 
-               if (!match_if_addr(&addr->addr, a, addr->mask))
+               // Handle offlink addresses correctly
+               unsigned int mask = addr->mask;
+               if ((addr->flags & DEVADDR_FAMILY) == DEVADDR_INET6 &&
+                               (addr->flags & DEVADDR_OFFLINK))
+                       mask = 128;
+
+               if (!match_if_addr(&addr->addr, a, mask))
                        continue;
 
                return true;
@@ -121,6 +199,9 @@ __find_ip_route_target(struct interface_ip_settings *ip, union if_addr *a,
                if (!match_if_addr(&route->addr, a, route->mask))
                        continue;
 
+               if (route->flags & DEVROUTE_TABLE)
+                       continue;
+
                if (!*res || route->mask < (*res)->mask)
                        *res = route;
        }
@@ -142,9 +223,8 @@ interface_ip_find_route_target(struct interface *iface, union if_addr *a,
 }
 
 struct interface *
-interface_ip_add_target_route(union if_addr *addr, bool v6)
+interface_ip_add_target_route(union if_addr *addr, bool v6, struct interface *iface)
 {
-       struct interface *iface;
        struct device_route *route, *r_next = NULL;
        bool defaultroute_target = false;
        int addrsize = v6 ? sizeof(addr->in6) : sizeof(addr->in);
@@ -160,7 +240,7 @@ interface_ip_add_target_route(union if_addr *addr, bool v6)
        else
                memcpy(&route->addr, addr, addrsize);
 
-       vlist_for_each_element(&interfaces, iface, node) {
+       if (iface) {
                /* look for locally addressable target first */
                if (interface_ip_find_addr_target(iface, addr, v6))
                        goto done;
@@ -168,6 +248,16 @@ interface_ip_add_target_route(union if_addr *addr, bool v6)
                /* do not stop at the first route, let the lookup compare
                 * masks to find the best match */
                interface_ip_find_route_target(iface, addr, v6, &r_next);
+       } else {
+               vlist_for_each_element(&interfaces, iface, node) {
+                       /* look for locally addressable target first */
+                       if (interface_ip_find_addr_target(iface, addr, v6))
+                               goto done;
+
+                       /* do not stop at the first route, let the lookup compare
+                        * masks to find the best match */
+                       interface_ip_find_route_target(iface, addr, v6, &r_next);
+               }
        }
 
        if (!r_next) {
@@ -179,16 +269,35 @@ interface_ip_add_target_route(union if_addr *addr, bool v6)
        memcpy(&route->nexthop, &r_next->nexthop, sizeof(route->nexthop));
        route->mtu = r_next->mtu;
        route->metric = r_next->metric;
+       route->table = r_next->table;
 
 done:
        route->iface = iface;
        if (defaultroute_target)
                free(route);
        else
-               vlist_add(&iface->host_routes, &route->node, &route->flags);
+               vlist_add(&iface->host_routes, &route->node, route);
        return iface;
 }
 
+static void
+interface_set_route_info(struct interface *iface, struct device_route *route)
+{
+       bool v6 = ((route->flags & DEVADDR_FAMILY) == DEVADDR_INET6);
+
+       if (!iface)
+               return;
+
+       if (!(route->flags & DEVROUTE_METRIC))
+               route->metric = iface->metric;
+
+       if (!(route->flags & DEVROUTE_TABLE)) {
+               route->table = (v6) ? iface->ip6table : iface->ip4table;
+               if (route->table)
+                       route->flags |= DEVROUTE_SRCTABLE;
+       }
+}
+
 void
 interface_ip_add_route(struct interface *iface, struct blob_attr *attr, bool v6)
 {
@@ -248,7 +357,56 @@ interface_ip_add_route(struct interface *iface, struct blob_attr *attr, bool v6)
                route->flags |= DEVROUTE_MTU;
        }
 
-       vlist_add(&ip->route, &route->node, &route->flags);
+       // Use source-based routing
+       if ((cur = tb[ROUTE_SOURCE]) != NULL) {
+               char *saveptr, *source = alloca(blobmsg_data_len(cur));
+               memcpy(source, blobmsg_data(cur), blobmsg_data_len(cur));
+
+               const char *addr = strtok_r(source, "/", &saveptr);
+               const char *mask = strtok_r(NULL, "/", &saveptr);
+
+               if (!addr || inet_pton(af, addr, &route->source) < 1) {
+                       DPRINTF("Failed to parse route source: %s\n", addr ? addr : "NULL");
+                       goto error;
+               }
+
+               route->sourcemask = (mask) ? atoi(mask) : ((af == AF_INET6) ? 128 : 32);
+       }
+
+       if ((cur = tb[ROUTE_ONLINK]) != NULL && blobmsg_get_bool(cur))
+               route->flags |= DEVROUTE_ONLINK;
+
+       if ((cur = tb[ROUTE_TABLE]) != NULL) {
+               if (!system_resolve_rt_table(blobmsg_data(cur), &route->table)) {
+                       DPRINTF("Failed to resolve routing table: %s\n", (char *) blobmsg_data(cur));
+                       goto error;
+               }
+
+               /* only set the table flag if not using the main (default) table */
+               if (system_is_default_rt_table(route->table))
+                       route->table = 0;
+
+               if (route->table)
+                       route->flags |= DEVROUTE_TABLE;
+       }
+
+       if ((cur = tb[ROUTE_VALID]) != NULL) {
+               int64_t valid = blobmsg_get_u32(cur);
+               int64_t valid_until = valid + (int64_t)system_get_rtime();
+               if (valid_until <= LONG_MAX && valid != 0xffffffffLL) // Catch overflow
+                       route->valid_until = valid_until;
+       }
+
+       if ((cur = tb[ROUTE_TYPE]) != NULL) {
+               if (!system_resolve_rt_type(blobmsg_data(cur), &route->type)) {
+                       DPRINTF("Failed to resolve routing type: %s\n", (char *) blobmsg_data(cur));
+                       goto error;
+               }
+               route->flags |= DEVROUTE_TYPE;
+       }
+
+       interface_set_route_info(iface, route);
+       vlist_add(&ip->route, &route->node, route);
        return;
 
 error:
@@ -265,14 +423,34 @@ addr_cmp(const void *k1, const void *k2, void *ptr)
 static int
 route_cmp(const void *k1, const void *k2, void *ptr)
 {
-       return memcmp(k1, k2, sizeof(struct device_route) -
-                     offsetof(struct device_route, flags));
+       const struct device_route *r1 = k1, *r2 = k2;
+
+       if (r1->mask != r2->mask)
+               return r2->mask - r1->mask;
+
+       if (r1->metric != r2->metric)
+               return r1->metric - r2->metric;
+
+       if (r1->flags != r2->flags)
+               return r2->flags - r1->flags;
+
+       if (r1->sourcemask != r2->sourcemask)
+               return r1->sourcemask - r2->sourcemask;
+
+       if (r1->table != r2->table)
+               return r1->table - r2->table;
+
+       int maskcmp = memcmp(&r1->source, &r2->source, sizeof(r1->source));
+       if (maskcmp)
+               return maskcmp;
+
+       return memcmp(&r1->addr, &r2->addr, sizeof(r1->addr));
 }
 
 static int
 prefix_cmp(const void *k1, const void *k2, void *ptr)
 {
-       return memcmp(k1, k2, sizeof(struct device_prefix) -
+       return memcmp(k1, k2, offsetof(struct device_prefix, pclass) -
                        offsetof(struct device_prefix, addr));
 }
 
@@ -280,25 +458,46 @@ static void
 interface_handle_subnet_route(struct interface *iface, struct device_addr *addr, bool add)
 {
        struct device *dev = iface->l3_dev.dev;
-       struct device_route route;
+       struct device_route *r = &addr->subnet;
 
-       memset(&route, 0, sizeof(route));
-       route.iface = iface;
-       route.flags = addr->flags;
-       route.mask = addr->mask;
-       memcpy(&route.addr, &addr->addr, sizeof(route.addr));
-       clear_if_addr(&route.addr, route.mask);
+       if (addr->flags & DEVADDR_OFFLINK)
+               return;
 
-       if (add) {
-               route.flags |= DEVADDR_KERNEL;
-               system_del_route(dev, &route);
+       if (!add) {
+               if (!addr->subnet.iface)
+                       return;
 
-               route.flags &= ~DEVADDR_KERNEL;
-               route.metric = iface->metric;
-               system_add_route(dev, &route);
-       } else {
-               system_del_route(dev, &route);
+               system_del_route(dev, r);
+               memset(r, 0, sizeof(*r));
+               return;
        }
+
+       r->iface = iface;
+       r->flags = addr->flags;
+       r->mask = addr->mask;
+       memcpy(&r->addr, &addr->addr, sizeof(r->addr));
+       clear_if_addr(&r->addr, r->mask);
+
+       r->flags |= DEVADDR_KERNEL;
+       system_del_route(dev, r);
+
+       r->flags &= ~DEVADDR_KERNEL;
+       interface_set_route_info(iface, r);
+
+       system_add_route(dev, r);
+}
+
+static void
+interface_add_addr_rules(struct device_addr *addr, bool enabled)
+{
+       bool v6 = (addr->flags & DEVADDR_FAMILY) == DEVADDR_INET6;
+
+       set_ip_source_policy(enabled, v6, IPRULE_PRIORITY_ADDR, &addr->addr,
+                            (v6) ? 128 : 32, addr->policy_table, NULL, NULL,
+                            true);
+       set_ip_source_policy(enabled, v6, IPRULE_PRIORITY_ADDR_MASK,
+                            &addr->addr, addr->mask, addr->policy_table, NULL,
+                            NULL, false);
 }
 
 static void
@@ -310,23 +509,34 @@ interface_update_proto_addr(struct vlist_tree *tree,
        struct interface *iface;
        struct device *dev;
        struct device_addr *a_new = NULL, *a_old = NULL;
+       bool replace = false;
        bool keep = false;
+       bool v6 = false;
 
        ip = container_of(tree, struct interface_ip_settings, addr);
        iface = ip->iface;
        dev = iface->l3_dev.dev;
 
+       if (!node_new || !node_old)
+               iface->updated |= IUF_ADDRESS;
+
        if (node_new) {
                a_new = container_of(node_new, struct device_addr, node);
 
                if ((a_new->flags & DEVADDR_FAMILY) == DEVADDR_INET4 &&
                    !a_new->broadcast) {
 
-                       uint32_t mask = ~0;
-                       uint32_t *a = (uint32_t *) &a_new->addr;
-
-                       mask >>= a_new->mask;
-                       a_new->broadcast = *a | htonl(mask);
+                       /* /31 and /32 addressing need 255.255.255.255
+                        * as broadcast address. */
+                       if (a_new->mask >= 31) {
+                               a_new->broadcast = (uint32_t) ~0;
+                       } else {
+                               uint32_t mask = ~0;
+                               uint32_t *a = (uint32_t *) &a_new->addr;
+
+                               mask >>= a_new->mask;
+                               a_new->broadcast = *a | htonl(mask);
+                       }
                }
        }
 
@@ -336,28 +546,57 @@ interface_update_proto_addr(struct vlist_tree *tree,
        if (a_new && a_old) {
                keep = true;
 
-               if (a_old->flags != a_new->flags)
+               if (a_old->flags != a_new->flags || a_old->failed)
                        keep = false;
 
+               if (a_old->valid_until != a_new->valid_until ||
+                               a_old->preferred_until != a_new->preferred_until)
+                       replace = true;
+
                if ((a_new->flags & DEVADDR_FAMILY) == DEVADDR_INET4 &&
                    a_new->broadcast != a_old->broadcast)
                        keep = false;
        }
 
        if (node_old) {
-               if (!(a_old->flags & DEVADDR_EXTERNAL) && a_old->enabled && !keep) {
-                       interface_handle_subnet_route(iface, a_old, false);
-                       system_del_address(dev, a_old);
+               if (a_old->enabled && !keep) {
+                       //This is needed for source routing to work correctly. If a device
+                       //has two connections to a network using the same subnet, adding
+                       //only the network-rule will cause packets to be routed through the
+                       //first matching network (source IP matches both masks).
+                       if (a_old->policy_table)
+                               interface_add_addr_rules(a_old, false);
+
+                       if (!(a_old->flags & DEVADDR_EXTERNAL)) {
+                               interface_handle_subnet_route(iface, a_old, false);
+                               system_del_address(dev, a_old);
+                       }
                }
+               free(a_old->pclass);
                free(a_old);
        }
 
        if (node_new) {
                a_new->enabled = true;
-               if (!(a_new->flags & DEVADDR_EXTERNAL) && !keep) {
-                       system_add_address(dev, a_new);
-                       if (iface->metric)
-                               interface_handle_subnet_route(iface, a_new, true);
+
+               if ((a_new->flags & DEVADDR_FAMILY) == DEVADDR_INET6)
+                               v6 = true;
+
+               a_new->policy_table = (v6) ? iface->ip6table : iface->ip4table;
+
+               if (!keep || replace) {
+                       if (!(a_new->flags & DEVADDR_EXTERNAL)) {
+                               if (system_add_address(dev, a_new))
+                                       a_new->failed = true;
+
+                               if (iface->metric || a_new->policy_table)
+                                       interface_handle_subnet_route(iface, a_new, true);
+                       }
+
+                       if (!keep) {
+                               if (a_new->policy_table)
+                                       interface_add_addr_rules(a_new, true);
+                       }
                }
        }
 }
@@ -386,26 +625,30 @@ interface_update_proto_route(struct vlist_tree *tree,
        iface = ip->iface;
        dev = iface->l3_dev.dev;
 
+       if (!node_new || !node_old)
+               iface->updated |= IUF_ROUTE;
+
        route_old = container_of(node_old, struct device_route, node);
        route_new = container_of(node_new, struct device_route, node);
 
        if (node_old && node_new)
-               keep = !memcmp(&route_old->nexthop, &route_new->nexthop, sizeof(route_old->nexthop));
+               keep = !memcmp(&route_old->nexthop, &route_new->nexthop, sizeof(route_old->nexthop)) &&
+                       (route_old->mtu == route_new->mtu) && (route_old->type == route_new->type) &&
+                       !route_old->failed;
 
        if (node_old) {
                if (!(route_old->flags & DEVADDR_EXTERNAL) && route_old->enabled && !keep)
                        system_del_route(dev, route_old);
+
                free(route_old);
        }
 
        if (node_new) {
                bool _enabled = enable_route(ip, route_new);
 
-               if (!(route_new->flags & DEVROUTE_METRIC))
-                       route_new->metric = iface->metric;
-
                if (!(route_new->flags & DEVADDR_EXTERNAL) && !keep && _enabled)
-                       system_add_route(dev, route_new);
+                       if (system_add_route(dev, route_new))
+                               route_new->failed = true;
 
                route_new->iface = iface;
                route_new->enabled = _enabled;
@@ -432,124 +675,309 @@ interface_update_host_route(struct vlist_tree *tree,
                free(route_old);
        }
 
-       if (node_new)
-               system_add_route(dev, route_new);
+       if (node_new) {
+               if (system_add_route(dev, route_new))
+                       route_new->failed = true;
+       }
+}
+
+static void
+random_ifaceid(struct in6_addr *addr)
+{
+       static bool initialized = false;
+       struct timeval t;
+
+       if (!initialized) {
+               long int seed = 0;
+               gettimeofday(&t, NULL);
+               seed = t.tv_sec ^ t.tv_usec ^ getpid();
+               srand48(seed);
+               initialized = true;
+       }
+       addr->s6_addr32[2] = (uint32_t)mrand48();
+       addr->s6_addr32[3] = (uint32_t)mrand48();
+}
+
+static void
+eui64_ifaceid(struct interface *iface, struct in6_addr *addr)
+{
+       /* get mac address */
+       uint8_t *macaddr = iface->l3_dev.dev->settings.macaddr;
+       uint8_t *ifaceid = addr->s6_addr + 8;
+       memcpy(ifaceid,macaddr,3);
+       memcpy(ifaceid + 5,macaddr + 3, 3);
+       ifaceid[3] = 0xff;
+       ifaceid[4] = 0xfe;
+       ifaceid[0] ^= 0x02;
 }
 
+static void
+generate_ifaceid(struct interface *iface, struct in6_addr *addr)
+{
+       /* generate new iface id */
+       switch (iface->assignment_iface_id_selection) {
+       case IFID_FIXED:
+               /* fixed */
+               /* copy host part from assignment_fixed_iface_id */
+               memcpy(addr->s6_addr + 8, iface->assignment_fixed_iface_id.s6_addr + 8, 8);
+               break;
+       case IFID_RANDOM:
+               /* randomize last 64 bits */
+               random_ifaceid(addr);
+               break;
+       case IFID_EUI64:
+               /* eui64 */
+               eui64_ifaceid(iface, addr);
+               break;
+       }
+}
 
 static void
-interface_set_prefix_address(struct interface *iface, bool add,
-               struct device_prefix_assignment *assignment)
+interface_set_prefix_address(struct device_prefix_assignment *assignment,
+               const struct device_prefix *prefix, struct interface *iface, bool add)
 {
-       struct interface *uplink = assignment->prefix->iface;
+       const struct interface *uplink = prefix->iface;
        if (!iface->l3_dev.dev)
                return;
 
        struct device *l3_downlink = iface->l3_dev.dev;
 
        struct device_addr addr;
+       struct device_route route;
        memset(&addr, 0, sizeof(addr));
-       addr.addr.in6 = assignment->addr;
+       memset(&route, 0, sizeof(route));
+
+       if (IN6_IS_ADDR_UNSPECIFIED(&assignment->addr)) {
+               addr.addr.in6 = prefix->addr;
+               addr.addr.in6.s6_addr32[1] |= htonl(assignment->assigned);
+               generate_ifaceid(iface, &addr.addr.in6);
+               assignment->addr = addr.addr.in6;
+       }
+       else
+               addr.addr.in6 = assignment->addr;
+
        addr.mask = assignment->length;
-       addr.flags = DEVADDR_INET6;
-       addr.preferred_until = assignment->prefix->preferred_until;
-       addr.valid_until = assignment->prefix->valid_until;
+       addr.flags = DEVADDR_INET6 | DEVADDR_OFFLINK;
+       addr.preferred_until = prefix->preferred_until;
+       addr.valid_until = prefix->valid_until;
 
-       if (!add) {
-               if (assignment->enabled)
-                       system_del_address(l3_downlink, &addr);
-       } else {
+       route.flags = DEVADDR_INET6;
+       route.mask = addr.mask < 64 ? 64 : addr.mask;
+       route.addr = addr.addr;
+       clear_if_addr(&route.addr, route.mask);
+       interface_set_route_info(iface, &route);
+
+       if (!add && assignment->enabled) {
+               time_t now = system_get_rtime();
+               addr.preferred_until = now;
+               if (!addr.valid_until || addr.valid_until - now > 7200)
+                       addr.valid_until = now + 7200;
+
+               if (prefix->iface) {
+                       if (prefix->iface->ip6table)
+                               set_ip_source_policy(false, true, IPRULE_PRIORITY_NW, &addr.addr,
+                                               addr.mask, prefix->iface->ip6table, iface, NULL, true);
+
+                       set_ip_source_policy(false, true, IPRULE_PRIORITY_REJECT, &addr.addr,
+                                                       addr.mask, 0, iface, "unreachable", true);
+               }
+
+               system_del_route(l3_downlink, &route);
                system_add_address(l3_downlink, &addr);
 
-               if (uplink && uplink->l3_dev.dev) {
-                       int mtu = system_update_ipv6_mtu(
-                                       uplink->l3_dev.dev, 0);
-                       if (mtu > 0)
+               assignment->enabled = false;
+       } else if (add && (iface->state == IFS_UP || iface->state == IFS_SETUP) &&
+                       !system_add_address(l3_downlink, &addr)) {
+
+               if (prefix->iface && !assignment->enabled) {
+                       set_ip_source_policy(true, true, IPRULE_PRIORITY_REJECT, &addr.addr,
+                                       addr.mask, 0, iface, "unreachable", true);
+
+                       if (prefix->iface->ip6table)
+                               set_ip_source_policy(true, true, IPRULE_PRIORITY_NW, &addr.addr,
+                                               addr.mask, prefix->iface->ip6table, iface, NULL, true);
+               }
+
+               route.metric = iface->metric;
+               system_add_route(l3_downlink, &route);
+
+               if (uplink && uplink->l3_dev.dev && !(l3_downlink->settings.flags & DEV_OPT_MTU6)) {
+                       int mtu = system_update_ipv6_mtu(uplink->l3_dev.dev, 0);
+                       int mtu_old = system_update_ipv6_mtu(l3_downlink, 0);
+
+                       if (mtu > 0 && mtu_old > mtu)
                                system_update_ipv6_mtu(l3_downlink, mtu);
                }
+
+               assignment->enabled = true;
        }
-       assignment->enabled = add;
 }
 
+static bool interface_prefix_assign(struct list_head *list,
+               struct device_prefix_assignment *assign)
+{
+       int32_t current = 0, asize = (1 << (64 - assign->length)) - 1;
+       struct device_prefix_assignment *c;
+       list_for_each_entry(c, list, head) {
+               if (assign->assigned != -1) {
+                       if (assign->assigned >= current && assign->assigned + asize < c->assigned) {
+                               list_add_tail(&assign->head, &c->head);
+                               return true;
+                       }
+               } else if (assign->assigned == -1) {
+                       current = (current + asize) & (~asize);
+                       if (current + asize < c->assigned) {
+                               assign->assigned = current;
+                               list_add_tail(&assign->head, &c->head);
+                               return true;
+                       }
+               }
+               current = (c->assigned + (1 << (64 - c->length)));
+       }
+       return false;
+}
 
-static void
-interface_update_prefix_assignments(struct vlist_tree *tree,
-                            struct vlist_node *node_new,
-                            struct vlist_node *node_old)
+static void interface_update_prefix_assignments(struct device_prefix *prefix, bool setup)
 {
-       struct device_prefix_assignment *old, *new;
-       old = container_of(node_old, struct device_prefix_assignment, node);
-       new = container_of(node_new, struct device_prefix_assignment, node);
+       struct device_prefix_assignment *c;
+       struct interface *iface;
 
-       // Assignments persist across interface reloads etc.
-       // so use indirection to avoid dangling pointers
-       struct interface *iface = vlist_find(&interfaces,
-                       (node_new) ? new->name : old->name, iface, node);
+       // Delete all assignments
+       while (!list_empty(&prefix->assignments)) {
+               c = list_first_entry(&prefix->assignments,
+                               struct device_prefix_assignment, head);
+               if ((iface = vlist_find(&interfaces, c->name, iface, node)))
+                       interface_set_prefix_address(c, prefix, iface, false);
+               list_del(&c->head);
+               free(c);
+       }
 
-       if (node_old && node_new) {
-               new->addr = old->addr;
-               new->length = old->length;
-       } else if (node_old) {
-               if (iface)
-                       interface_set_prefix_address(iface, false, old);
-               free(old->name);
-               free(old);
-       } else if (node_new) {
-               struct device_prefix *prefix = new->prefix;
-               uint64_t want = 1ULL << (64 - new->length);
-               prefix->avail &= ~(want - 1);
-               prefix->avail -= want;
+       if (!setup)
+               return;
 
-               // Invert assignment
-               uint64_t assigned = ~prefix->avail;
-               assigned &= (1ULL << (64 - prefix->length)) - 1;
-               assigned &= ~(want - 1);
+       // End-of-assignment sentinel
+       c = malloc(sizeof(*c) + 1);
+       if (!c)
+               return;
 
-               // Assignment
-               new->addr = prefix->addr;
-               new->addr.s6_addr32[0] |=
-                               htonl(assigned >> 32);
-               new->addr.s6_addr32[1] |=
-                               htonl(assigned & 0xffffffffU);
-               new->addr.s6_addr[15] += 1;
+       c->assigned = 1 << (64 - prefix->length);
+       c->length = 64;
+       c->name[0] = 0;
+       c->addr = in6addr_any;
+       list_add(&c->head, &prefix->assignments);
+
+       // Excluded prefix
+       if (prefix->excl_length > 0) {
+               const char name[] = "!excluded";
+               c = malloc(sizeof(*c) + sizeof(name));
+               if (c) {
+                       c->assigned = ntohl(prefix->excl_addr.s6_addr32[1]) &
+                                       ((1 << (64 - prefix->length)) - 1);
+                       c->length = prefix->excl_length;
+                       c->addr = in6addr_any;
+                       memcpy(c->name, name, sizeof(name));
+                       list_add(&c->head, &prefix->assignments);
+               }
        }
 
-       if (node_new && (iface->state == IFS_UP || iface->state == IFS_SETUP))
-               interface_set_prefix_address(iface, true, new);
-}
+       bool assigned_any = false;
+       struct list_head assign_later = LIST_HEAD_INIT(assign_later);
+       vlist_for_each_element(&interfaces, iface, node) {
+               if (iface->assignment_length < 48 ||
+                               iface->assignment_length > 64)
+                       continue;
 
+               // Test whether there is a matching class
+               if (!list_empty(&iface->assignment_classes)) {
+                       bool found = false;
 
-void
-interface_ip_set_prefix_assignment(struct device_prefix *prefix,
-               struct interface *iface, uint8_t length)
-{
-       if (!length || length > 64) {
-               struct device_prefix_assignment *assignment = vlist_find(
-                               prefix->assignments, &iface, assignment, node);
-               if (assignment)
-                       interface_set_prefix_address(iface, false, assignment);
-       } else {
-               uint8_t length = iface->proto_ip.assignment_length;
-               uint64_t want = 1ULL << (64 - length);
-               if (prefix->avail < want && prefix->avail > 0) {
-                       do {
-                               want = 1ULL << (64 - ++length);
-                       } while (want > prefix->avail);
+                       struct interface_assignment_class *c;
+                       list_for_each_entry(c, &iface->assignment_classes, head) {
+                               if (!strcmp(c->name, prefix->pclass)) {
+                                       found = true;
+                                       break;
+                               }
+                       }
+
+                       if (!found)
+                               continue;
                }
 
-               if (prefix->avail < want)
-                       return;
+               size_t namelen = strlen(iface->name) + 1;
+               c = malloc(sizeof(*c) + namelen);
+               if (!c)
+                       continue;
 
-               // Assignment
-               struct device_prefix_assignment *assignment = calloc(1, sizeof(*assignment));
-               assignment->prefix = prefix;
-               assignment->length = length;
-               assignment->name = strdup(iface->name);
+               c->length = iface->assignment_length;
+               c->assigned = iface->assignment_hint;
+               c->addr = in6addr_any;
+               c->enabled = false;
+               memcpy(c->name, iface->name, namelen);
+
+               // First process all custom assignments, put all others in later-list
+               if (c->assigned == -1 || !interface_prefix_assign(&prefix->assignments, c)) {
+                       if (c->assigned != -1) {
+                               c->assigned = -1;
+                               netifd_log_message(L_WARNING, "Failed to assign requested subprefix "
+                                               "of size %hhu for %s, trying other\n", c->length, c->name);
+                       }
+
+                       struct list_head *next = &assign_later;
+                       struct device_prefix_assignment *n;
+                       list_for_each_entry(n, &assign_later, head) {
+                               if (n->length < c->length) {
+                                       next = &n->head;
+                                       break;
+                               }
+                       }
+                       list_add_tail(&c->head, next);
+               }
 
-               vlist_add(prefix->assignments, &assignment->node, assignment->name);
+               if (c->assigned != -1)
+                       assigned_any = true;
        }
+
+       // Then try to assign all other + failed custom assignments
+       while (!list_empty(&assign_later)) {
+               c = list_first_entry(&assign_later, struct device_prefix_assignment, head);
+               list_del(&c->head);
+
+               bool assigned = false;
+               do {
+                       assigned = interface_prefix_assign(&prefix->assignments, c);
+               } while (!assigned && ++c->length <= 64);
+
+               if (!assigned) {
+                       netifd_log_message(L_WARNING, "Failed to assign subprefix "
+                                       "of size %hhu for %s\n", c->length, c->name);
+                       free(c);
+               } else {
+                       assigned_any = true;
+               }
+       }
+
+       list_for_each_entry(c, &prefix->assignments, head)
+               if ((iface = vlist_find(&interfaces, c->name, iface, node)))
+                       interface_set_prefix_address(c, prefix, iface, true);
+
+       if (!assigned_any)
+               netifd_log_message(L_WARNING, "You have delegated IPv6-prefixes but haven't assigned them "
+                               "to any interface. Did you forget to set option ip6assign on your lan-interfaces?");
 }
 
+
+void interface_refresh_assignments(bool hint)
+{
+       static bool refresh = false;
+       if (!hint && refresh) {
+               struct device_prefix *p;
+               list_for_each_entry(p, &prefixes, head)
+                       interface_update_prefix_assignments(p, true);
+       }
+       refresh = hint;
+}
+
+
 static void
 interface_update_prefix(struct vlist_tree *tree,
                             struct vlist_node *node_new,
@@ -559,6 +987,10 @@ interface_update_prefix(struct vlist_tree *tree,
        prefix_old = container_of(node_old, struct device_prefix, node);
        prefix_new = container_of(node_new, struct device_prefix, node);
 
+       struct interface_ip_settings *ip = container_of(tree, struct interface_ip_settings, prefix);
+       if (tree && (!node_new || !node_old))
+               ip->iface->updated |= IUF_PREFIX;
+
        struct device_route route;
        memset(&route, 0, sizeof(route));
        route.flags = DEVADDR_INET6;
@@ -566,89 +998,104 @@ interface_update_prefix(struct vlist_tree *tree,
        route.mask = (node_new) ? prefix_new->length : prefix_old->length;
        route.addr.in6 = (node_new) ? prefix_new->addr : prefix_old->addr;
 
-       if (node_old && node_new) {
-               prefix_new->avail = prefix_old->avail;
-               prefix_new->assignments = prefix_old->assignments;
-               prefix_old->assignments = NULL;
-
-               // Update all assignments
-               struct device_prefix_assignment *assignment;
-               struct vlist_tree *assignments = prefix_new->assignments;
-               vlist_for_each_element(assignments, assignment, node)
-                       assignments->update(assignments,
-                                       &assignment->node, &assignment->node);
-       } else if (node_new) {
-               prefix_new->avail = 1ULL << (64 - prefix_new->length);
-               prefix_new->assignments = calloc(1, sizeof(*prefix_new->assignments));
-               vlist_init(prefix_new->assignments, avl_strcmp,
-                               interface_update_prefix_assignments);
 
-               // Create initial assignments for interfaces
-               struct interface *iface;
-               vlist_for_each_element(&interfaces, iface, node)
-                       interface_ip_set_prefix_assignment(prefix_new, iface,
-                                       iface->proto_ip.assignment_length);
+       struct device_prefix_assignment *c;
+       struct interface *iface;
 
-               list_add(&prefix_new->head, &prefixes);
+       if (node_old && node_new) {
+               // Move assignments and refresh addresses to update valid times
+               list_splice(&prefix_old->assignments, &prefix_new->assignments);
 
+               list_for_each_entry(c, &prefix_new->assignments, head)
+                       if ((iface = vlist_find(&interfaces, c->name, iface, node)))
+                               interface_set_prefix_address(c, prefix_new, iface, true);
+       } else if (node_new) {
                // Set null-route to avoid routing loops
                system_add_route(NULL, &route);
-       }
 
-       if (node_old) {
+               if (!prefix_new->iface || !prefix_new->iface->proto_ip.no_delegation)
+                       interface_update_prefix_assignments(prefix_new, true);
+       } else if (node_old) {
                // Remove null-route
+               interface_update_prefix_assignments(prefix_old, false);
                system_del_route(NULL, &route);
+       }
 
-               list_del(&prefix_old->head);
-
-               if (prefix_old->assignments) {
-                       vlist_flush_all(prefix_old->assignments);
-                       free(prefix_old->assignments);
-               }
+       if (node_old) {
+               if (prefix_old->head.next)
+                       list_del(&prefix_old->head);
                free(prefix_old);
        }
+
+       if (node_new && (!prefix_new->iface || !prefix_new->iface->proto_ip.no_delegation))
+               list_add(&prefix_new->head, &prefixes);
+
 }
 
-void
+struct device_prefix*
 interface_ip_add_device_prefix(struct interface *iface, struct in6_addr *addr,
-               uint8_t length, time_t valid_until, time_t preferred_until)
+               uint8_t length, time_t valid_until, time_t preferred_until,
+               struct in6_addr *excl_addr, uint8_t excl_length, const char *pclass)
 {
-       struct device_prefix *prefix = calloc(1, sizeof(*prefix));
+       if (!pclass)
+               pclass = (iface) ? iface->name : "local";
+
+       struct device_prefix *prefix = calloc(1, sizeof(*prefix) + strlen(pclass) + 1);
+       if (!prefix)
+               return NULL;
+
        prefix->length = length;
        prefix->addr = *addr;
        prefix->preferred_until = preferred_until;
        prefix->valid_until = valid_until;
        prefix->iface = iface;
+       INIT_LIST_HEAD(&prefix->assignments);
+
+       if (excl_addr) {
+               prefix->excl_addr = *excl_addr;
+               prefix->excl_length = excl_length;
+       }
+
+       strcpy(prefix->pclass, pclass);
 
        if (iface)
                vlist_add(&iface->proto_ip.prefix, &prefix->node, &prefix->addr);
        else
                interface_update_prefix(NULL, &prefix->node, NULL);
+
+       return prefix;
 }
 
 void
 interface_ip_set_ula_prefix(const char *prefix)
 {
        char buf[INET6_ADDRSTRLEN + 4] = {0}, *saveptr;
-       strncpy(buf, prefix, sizeof(buf) - 1);
+       if (prefix)
+               strncpy(buf, prefix, sizeof(buf) - 1);
        char *prefixaddr = strtok_r(buf, "/", &saveptr);
 
        struct in6_addr addr;
-       if (!prefixaddr || inet_pton(AF_INET6, prefixaddr, &addr) < 1)
+       if (!prefixaddr || inet_pton(AF_INET6, prefixaddr, &addr) < 1) {
+               if (ula_prefix) {
+                       interface_update_prefix(NULL, NULL, &ula_prefix->node);
+                       ula_prefix = NULL;
+               }
                return;
+       }
 
        int length;
        char *prefixlen = strtok_r(NULL, ",", &saveptr);
        if (!prefixlen || (length = atoi(prefixlen)) < 1 || length > 64)
                return;
 
-       if (ula_prefix && (!IN6_ARE_ADDR_EQUAL(&addr, &ula_prefix->addr) ||
-                       ula_prefix->length != length)) {
-               interface_update_prefix(NULL, NULL, &ula_prefix->node);
-               ula_prefix = NULL;
-       }
+       if (!ula_prefix || !IN6_ARE_ADDR_EQUAL(&addr, &ula_prefix->addr) ||
+                       ula_prefix->length != length) {
+               if (ula_prefix)
+                       interface_update_prefix(NULL, NULL, &ula_prefix->node);
 
-       interface_ip_add_device_prefix(NULL, &addr, length, 0, 0);
+               ula_prefix = interface_ip_add_device_prefix(NULL, &addr, length,
+                               0, 0, NULL, 0, NULL);
+       }
 }
 
 void
@@ -727,19 +1174,22 @@ interface_add_dns_search_list(struct interface_ip_settings *ip, struct blob_attr
 }
 
 static void
-write_resolv_conf_entries(FILE *f, struct interface_ip_settings *ip)
+write_resolv_conf_entries(FILE *f, struct interface_ip_settings *ip, const char *dev)
 {
        struct dns_server *s;
        struct dns_search_domain *d;
        const char *str;
-       char buf[32];
+       char buf[INET6_ADDRSTRLEN];
 
        vlist_simple_for_each_element(&ip->dns_servers, s, node) {
                str = inet_ntop(s->af, &s->addr, buf, sizeof(buf));
                if (!str)
                        continue;
 
-               fprintf(f, "nameserver %s\n", str);
+               if (s->af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&s->addr.in6))
+                       fprintf(f, "nameserver %s%%%s\n", str, dev);
+               else
+                       fprintf(f, "nameserver %s\n", str);
        }
 
        vlist_simple_for_each_element(&ip->dns_search, d, node) {
@@ -753,10 +1203,11 @@ interface_write_resolv_conf(void)
        struct interface *iface;
        char *path = alloca(strlen(resolv_conf) + 5);
        FILE *f;
+       uint32_t crcold, crcnew;
 
        sprintf(path, "%s.tmp", resolv_conf);
        unlink(path);
-       f = fopen(path, "w");
+       f = fopen(path, "w+");
        if (!f) {
                D(INTERFACE, "Failed to open %s for writing\n", path);
                return;
@@ -773,12 +1224,25 @@ interface_write_resolv_conf(void)
                        continue;
 
                fprintf(f, "# Interface %s\n", iface->name);
-               write_resolv_conf_entries(f, &iface->config_ip);
+               write_resolv_conf_entries(f, &iface->config_ip, iface->ifname);
                if (!iface->proto_ip.no_dns)
-                       write_resolv_conf_entries(f, &iface->proto_ip);
+                       write_resolv_conf_entries(f, &iface->proto_ip, iface->ifname);
        }
+       fflush(f);
+       rewind(f);
+       crcnew = crc32_file(f);
        fclose(f);
-       if (rename(path, resolv_conf) < 0) {
+
+       crcold = crcnew + 1;
+       f = fopen(resolv_conf, "r");
+       if (f) {
+               crcold = crc32_file(f);
+               fclose(f);
+       }
+
+       if (crcold == crcnew) {
+               unlink(path);
+       } else if (rename(path, resolv_conf) < 0) {
                D(INTERFACE, "Failed to replace %s\n", resolv_conf);
                unlink(path);
        }
@@ -789,26 +1253,48 @@ void interface_ip_set_enabled(struct interface_ip_settings *ip, bool enabled)
        struct device_addr *addr;
        struct device_route *route;
        struct device *dev;
+       struct interface *iface;
 
        ip->enabled = enabled;
-       dev = ip->iface->l3_dev.dev;
+       iface = ip->iface;
+       dev = iface->l3_dev.dev;
        if (!dev)
                return;
 
        vlist_for_each_element(&ip->addr, addr, node) {
+               bool v6 = ((addr->flags & DEVADDR_FAMILY) == DEVADDR_INET6) ? true : false;
+
+               if (addr->flags & DEVADDR_EXTERNAL)
+                       continue;
+
                if (addr->enabled == enabled)
                        continue;
 
-               if (enabled)
+               if (enabled) {
                        system_add_address(dev, addr);
-               else
+
+                       addr->policy_table = (v6) ? iface->ip6table : iface->ip4table;
+                       if (iface->metric || addr->policy_table)
+                               interface_handle_subnet_route(iface, addr, true);
+
+                       if (addr->policy_table)
+                               interface_add_addr_rules(addr, true);
+               } else {
+                       interface_handle_subnet_route(iface, addr, false);
                        system_del_address(dev, addr);
+
+                       if (addr->policy_table)
+                               interface_add_addr_rules(addr, false);
+               }
                addr->enabled = enabled;
        }
 
        vlist_for_each_element(&ip->route, route, node) {
                bool _enabled = enabled;
 
+               if (route->flags & DEVADDR_EXTERNAL)
+                       continue;
+
                if (!enable_route(ip, route))
                        _enabled = false;
 
@@ -816,14 +1302,31 @@ void interface_ip_set_enabled(struct interface_ip_settings *ip, bool enabled)
                        continue;
 
                if (_enabled) {
-                       if (!(route->flags & DEVROUTE_METRIC))
-                               route->metric = ip->iface->metric;
+                       interface_set_route_info(ip->iface, route);
 
-                       system_add_route(dev, route);
+                       if (system_add_route(dev, route))
+                               route->failed = true;
                } else
                        system_del_route(dev, route);
                route->enabled = _enabled;
        }
+
+       struct device_prefix *c;
+       struct device_prefix_assignment *a;
+       list_for_each_entry(c, &prefixes, head)
+               list_for_each_entry(a, &c->assignments, head)
+                       if (!strcmp(a->name, ip->iface->name))
+                               interface_set_prefix_address(a, c, ip->iface, enabled);
+
+       if (ip->iface && ip->iface->policy_rules_set != enabled &&
+           ip->iface->l3_dev.dev) {
+               set_ip_lo_policy(enabled, true, ip->iface);
+               set_ip_lo_policy(enabled, false, ip->iface);
+
+               set_ip_source_policy(enabled, true, IPRULE_PRIORITY_REJECT + ip->iface->l3_dev.dev->ifindex,
+                       NULL, 0, 0, ip->iface, "failed_policy", true);
+               ip->iface->policy_rules_set = enabled;
+       }
 }
 
 void
@@ -880,3 +1383,40 @@ interface_ip_init(struct interface *iface)
        __interface_ip_init(&iface->config_ip, iface);
        vlist_init(&iface->host_routes, route_cmp, interface_update_host_route);
 }
+
+static void
+interface_ip_valid_until_handler(struct uloop_timeout *t)
+{
+       time_t now = system_get_rtime();
+       struct interface *iface;
+       vlist_for_each_element(&interfaces, iface, node) {
+               if (iface->state != IFS_UP)
+                       continue;
+
+               struct device_addr *addr, *addrp;
+               struct device_route *route, *routep;
+               struct device_prefix *pref, *prefp;
+
+               vlist_for_each_element_safe(&iface->proto_ip.addr, addr, node, addrp)
+                       if (addr->valid_until && addr->valid_until < now)
+                               vlist_delete(&iface->proto_ip.addr, &addr->node);
+
+               vlist_for_each_element_safe(&iface->proto_ip.route, route, node, routep)
+                       if (route->valid_until && route->valid_until < now)
+                               vlist_delete(&iface->proto_ip.route, &route->node);
+
+               vlist_for_each_element_safe(&iface->proto_ip.prefix, pref, node, prefp)
+                       if (pref->valid_until && pref->valid_until < now)
+                               vlist_delete(&iface->proto_ip.prefix, &pref->node);
+
+       }
+
+       uloop_timeout_set(t, 1000);
+}
+
+static void __init
+interface_ip_init_worker(void)
+{
+       valid_until_timeout.cb = interface_ip_valid_until_handler;
+       uloop_timeout_set(&valid_until_timeout, 1000);
+}