+ system_fd_set_cloexec(sock_ioctl);
+
+ // Prepare socket for routing / address control
+ sock_rtnl = create_socket(NETLINK_ROUTE, 0);
+ if (!sock_rtnl)
+ return -1;
+
+ if (!create_event_socket(&rtnl_event, NETLINK_ROUTE, cb_rtnl_event))
+ return -1;
+
+ if (!create_raw_event_socket(&hotplug_event, NETLINK_KOBJECT_UEVENT, 1,
+ handle_hotplug_event, 0))
+ return -1;
+
+ // Receive network link events form kernel
+ nl_socket_add_membership(rtnl_event.sock, RTNLGRP_LINK);
+
+ return 0;
+}
+
+static void system_set_sysctl(const char *path, const char *val)
+{
+ int fd;
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ return;
+
+ if (write(fd, val, strlen(val))) {}
+ close(fd);
+}
+
+static void system_set_dev_sysctl(const char *path, const char *device, const char *val)
+{
+ snprintf(dev_buf, sizeof(dev_buf), path, device);
+ system_set_sysctl(dev_buf, val);
+}
+
+static void system_set_disable_ipv6(struct device *dev, const char *val)
+{
+ system_set_dev_sysctl("/proc/sys/net/ipv6/conf/%s/disable_ipv6", dev->ifname, val);
+}
+
+static int system_get_sysctl(const char *path, char *buf, const size_t buf_sz)
+{
+ int fd = -1, ret = -1;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ goto out;
+
+ ssize_t len = read(fd, buf, buf_sz - 1);
+ if (len < 0)
+ goto out;
+
+ ret = buf[len] = 0;
+
+out:
+ if (fd >= 0)
+ close(fd);
+
+ return ret;
+}
+
+static int
+system_get_dev_sysctl(const char *path, const char *device, char *buf, const size_t buf_sz)
+{
+ snprintf(dev_buf, sizeof(dev_buf), path, device);
+ return system_get_sysctl(dev_buf, buf, buf_sz);
+}
+
+static int system_get_disable_ipv6(struct device *dev, char *buf, const size_t buf_sz)
+{
+ return system_get_dev_sysctl("/proc/sys/net/ipv6/conf/%s/disable_ipv6",
+ dev->ifname, buf, buf_sz);
+}
+
+#ifndef IFF_LOWER_UP
+#define IFF_LOWER_UP 0x10000
+#endif
+
+// Evaluate netlink messages
+static int cb_rtnl_event(struct nl_msg *msg, void *arg)
+{
+ struct nlmsghdr *nh = nlmsg_hdr(msg);
+ struct ifinfomsg *ifi = NLMSG_DATA(nh);
+ struct nlattr *nla[__IFLA_MAX];
+
+ if (nh->nlmsg_type != RTM_NEWLINK)
+ goto out;
+
+ nlmsg_parse(nh, sizeof(*ifi), nla, __IFLA_MAX - 1, NULL);
+ if (!nla[IFLA_IFNAME])
+ goto out;
+
+ struct device *dev = device_get(nla_data(nla[IFLA_IFNAME]), false);
+ if (!dev)
+ goto out;
+
+ device_set_ifindex(dev, ifi->ifi_index);
+ device_set_link(dev, ifi->ifi_flags & IFF_LOWER_UP ? true : false);
+
+out:
+ return 0;
+}
+
+static void
+handle_hotplug_msg(char *data, int size)
+{
+ const char *subsystem = NULL, *interface = NULL;
+ char *cur, *end, *sep;
+ struct device *dev;
+ int skip;
+ bool add;
+
+ if (!strncmp(data, "add@", 4))
+ add = true;
+ else if (!strncmp(data, "remove@", 7))
+ add = false;
+ else
+ return;
+
+ skip = strlen(data) + 1;
+ end = data + size;
+
+ for (cur = data + skip; cur < end; cur += skip) {
+ skip = strlen(cur) + 1;
+
+ sep = strchr(cur, '=');
+ if (!sep)
+ continue;
+
+ *sep = 0;
+ if (!strcmp(cur, "INTERFACE"))
+ interface = sep + 1;
+ else if (!strcmp(cur, "SUBSYSTEM")) {
+ subsystem = sep + 1;
+ if (strcmp(subsystem, "net") != 0)
+ return;
+ }
+ if (subsystem && interface)
+ goto found;
+ }
+ return;
+
+found:
+ dev = device_get(interface, false);
+ if (!dev)
+ return;
+
+ if (dev->type != &simple_device_type)
+ return;
+
+ if (add && system_if_force_external(dev->ifname))
+ return;
+
+ device_set_present(dev, add);
+}
+
+static void
+handle_hotplug_event(struct uloop_fd *u, unsigned int events)
+{
+ struct event_socket *ev = container_of(u, struct event_socket, uloop);
+ struct sockaddr_nl nla;
+ unsigned char *buf = NULL;
+ int size;
+
+ while ((size = nl_recv(ev->sock, &nla, &buf, NULL)) > 0) {
+ if (nla.nl_pid == 0)
+ handle_hotplug_msg((char *) buf, size);
+
+ free(buf);
+ }
+}
+
+static int system_rtnl_call(struct nl_msg *msg)
+{
+ int ret;
+
+ ret = nl_send_auto_complete(sock_rtnl, msg);
+ nlmsg_free(msg);
+
+ if (ret < 0)
+ return ret;
+
+ return nl_wait_for_ack(sock_rtnl);
+}
+
+int system_bridge_delbr(struct device *bridge)
+{
+ return ioctl(sock_ioctl, SIOCBRDELBR, bridge->ifname);
+}
+
+static int system_bridge_if(const char *bridge, struct device *dev, int cmd, void *data)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+ if (dev)
+ ifr.ifr_ifindex = dev->ifindex;
+ else
+ ifr.ifr_data = data;
+ strncpy(ifr.ifr_name, bridge, sizeof(ifr.ifr_name));
+ return ioctl(sock_ioctl, cmd, &ifr);
+}
+
+static bool system_is_bridge(const char *name, char *buf, int buflen)
+{
+ struct stat st;
+
+ snprintf(buf, buflen, "/sys/devices/virtual/net/%s/bridge", name);
+ if (stat(buf, &st) < 0)
+ return false;
+
+ return true;
+}
+
+static char *system_get_bridge(const char *name, char *buf, int buflen)
+{
+ char *path;
+ ssize_t len;
+ glob_t gl;
+
+ snprintf(buf, buflen, "/sys/devices/virtual/net/*/brif/%s/bridge", name);
+ if (glob(buf, GLOB_NOSORT, NULL, &gl) < 0)
+ return NULL;
+
+ if (gl.gl_pathc == 0)
+ return NULL;
+
+ len = readlink(gl.gl_pathv[0], buf, buflen);
+ if (len < 0)
+ return NULL;
+
+ buf[len] = 0;
+ path = strrchr(buf, '/');
+ if (!path)
+ return NULL;
+
+ return path + 1;
+}
+
+int system_bridge_addif(struct device *bridge, struct device *dev)
+{
+ char *oldbr;
+
+ oldbr = system_get_bridge(dev->ifname, dev_buf, sizeof(dev_buf));
+ if (oldbr && !strcmp(oldbr, bridge->ifname))
+ return 0;
+
+ return system_bridge_if(bridge->ifname, dev, SIOCBRADDIF, NULL);
+}
+
+int system_bridge_delif(struct device *bridge, struct device *dev)
+{
+ return system_bridge_if(bridge->ifname, dev, SIOCBRDELIF, NULL);
+}
+
+static int system_if_resolve(struct device *dev)
+{
+ struct ifreq ifr;
+ strncpy(ifr.ifr_name, dev->ifname, sizeof(ifr.ifr_name));
+ if (!ioctl(sock_ioctl, SIOCGIFINDEX, &ifr))
+ return ifr.ifr_ifindex;
+ else
+ return 0;
+}
+
+static int system_if_flags(const char *ifname, unsigned add, unsigned rem)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+ strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ ioctl(sock_ioctl, SIOCGIFFLAGS, &ifr);
+ ifr.ifr_flags |= add;
+ ifr.ifr_flags &= ~rem;
+ return ioctl(sock_ioctl, SIOCSIFFLAGS, &ifr);
+}
+
+struct clear_data {
+ struct nl_msg *msg;
+ struct device *dev;
+ int type;
+ int size;
+ int af;
+};
+
+
+static bool check_ifaddr(struct nlmsghdr *hdr, int ifindex)
+{
+ struct ifaddrmsg *ifa = NLMSG_DATA(hdr);
+
+ return ifa->ifa_index == ifindex;
+}
+
+static bool check_route(struct nlmsghdr *hdr, int ifindex)
+{
+ struct rtmsg *r = NLMSG_DATA(hdr);
+ struct nlattr *tb[__RTA_MAX];
+
+ if (r->rtm_protocol == RTPROT_KERNEL &&
+ r->rtm_family == AF_INET6)
+ return false;
+
+ nlmsg_parse(hdr, sizeof(struct rtmsg), tb, __RTA_MAX - 1, NULL);
+ if (!tb[RTA_OIF])
+ return false;
+
+ return *(int *)RTA_DATA(tb[RTA_OIF]) == ifindex;
+}
+
+static bool check_rule(struct nlmsghdr *hdr, int ifindex)
+{
+ return true;
+}
+
+static int cb_clear_event(struct nl_msg *msg, void *arg)
+{
+ struct clear_data *clr = arg;
+ struct nlmsghdr *hdr = nlmsg_hdr(msg);
+ bool (*cb)(struct nlmsghdr *, int ifindex);
+ int type;
+
+ switch(clr->type) {
+ case RTM_GETADDR:
+ type = RTM_DELADDR;
+ if (hdr->nlmsg_type != RTM_NEWADDR)
+ return NL_SKIP;
+
+ cb = check_ifaddr;
+ break;
+ case RTM_GETROUTE:
+ type = RTM_DELROUTE;
+ if (hdr->nlmsg_type != RTM_NEWROUTE)
+ return NL_SKIP;
+
+ cb = check_route;
+ break;
+ case RTM_GETRULE:
+ type = RTM_DELRULE;
+ if (hdr->nlmsg_type != RTM_NEWRULE)
+ return NL_SKIP;
+
+ cb = check_rule;
+ break;
+ default:
+ return NL_SKIP;
+ }
+
+ if (!cb(hdr, clr->dev ? clr->dev->ifindex : 0))
+ return NL_SKIP;
+
+ if (type == RTM_DELRULE)
+ D(SYSTEM, "Remove a rule\n");
+ else
+ D(SYSTEM, "Remove %s from device %s\n",
+ type == RTM_DELADDR ? "an address" : "a route",
+ clr->dev->ifname);
+ memcpy(nlmsg_hdr(clr->msg), hdr, hdr->nlmsg_len);
+ hdr = nlmsg_hdr(clr->msg);
+ hdr->nlmsg_type = type;
+ hdr->nlmsg_flags = NLM_F_REQUEST;
+
+ nl_socket_disable_auto_ack(sock_rtnl);
+ nl_send_auto_complete(sock_rtnl, clr->msg);
+ nl_socket_enable_auto_ack(sock_rtnl);
+
+ return NL_SKIP;
+}
+
+static int
+cb_finish_event(struct nl_msg *msg, void *arg)
+{
+ int *pending = arg;
+ *pending = 0;
+ return NL_STOP;
+}
+
+static int
+error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err, void *arg)
+{
+ int *pending = arg;
+ *pending = err->error;
+ return NL_STOP;
+}
+
+static void
+system_if_clear_entries(struct device *dev, int type, int af)
+{
+ struct clear_data clr;
+ struct nl_cb *cb = nl_cb_alloc(NL_CB_DEFAULT);
+ struct rtmsg rtm = {
+ .rtm_family = af,
+ .rtm_flags = RTM_F_CLONED,
+ };
+ int flags = NLM_F_DUMP;
+ int pending = 1;
+
+ clr.af = af;
+ clr.dev = dev;
+ clr.type = type;
+ switch (type) {
+ case RTM_GETADDR:
+ case RTM_GETRULE:
+ clr.size = sizeof(struct rtgenmsg);
+ break;
+ case RTM_GETROUTE:
+ clr.size = sizeof(struct rtmsg);
+ break;
+ default:
+ return;
+ }
+
+ if (!cb)
+ return;
+
+ clr.msg = nlmsg_alloc_simple(type, flags);
+ if (!clr.msg)
+ goto out;
+
+ nlmsg_append(clr.msg, &rtm, clr.size, 0);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, cb_clear_event, &clr);
+ nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, cb_finish_event, &pending);
+ nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &pending);
+
+ nl_send_auto_complete(sock_rtnl, clr.msg);
+ while (pending > 0)
+ nl_recvmsgs(sock_rtnl, cb);
+
+ nlmsg_free(clr.msg);
+out:
+ nl_cb_put(cb);
+}
+
+/*
+ * Clear bridge (membership) state and bring down device
+ */
+void system_if_clear_state(struct device *dev)
+{
+ static char buf[256];
+ char *bridge;
+
+ device_set_ifindex(dev, system_if_resolve(dev));
+ if (dev->external || !dev->ifindex)
+ return;
+
+ system_if_flags(dev->ifname, 0, IFF_UP);
+
+ if (system_is_bridge(dev->ifname, buf, sizeof(buf))) {
+ D(SYSTEM, "Delete existing bridge named '%s'\n", dev->ifname);
+ system_bridge_delbr(dev);
+ return;
+ }
+
+ bridge = system_get_bridge(dev->ifname, buf, sizeof(buf));
+ if (bridge) {
+ D(SYSTEM, "Remove device '%s' from bridge '%s'\n", dev->ifname, bridge);
+ system_bridge_if(bridge, dev, SIOCBRDELIF, NULL);
+ }
+
+ system_if_clear_entries(dev, RTM_GETROUTE, AF_INET);
+ system_if_clear_entries(dev, RTM_GETADDR, AF_INET);
+ system_if_clear_entries(dev, RTM_GETROUTE, AF_INET6);
+ system_if_clear_entries(dev, RTM_GETADDR, AF_INET6);
+ system_set_disable_ipv6(dev, "0");
+}
+
+static inline unsigned long
+sec_to_jiffies(int val)
+{
+ return (unsigned long) val * 100;
+}
+
+int system_bridge_addbr(struct device *bridge, struct bridge_config *cfg)
+{
+ unsigned long args[4] = {};
+
+ if (ioctl(sock_ioctl, SIOCBRADDBR, bridge->ifname) < 0)
+ return -1;
+
+ args[0] = BRCTL_SET_BRIDGE_STP_STATE;
+ args[1] = !!cfg->stp;
+ system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+
+ args[0] = BRCTL_SET_BRIDGE_FORWARD_DELAY;
+ args[1] = sec_to_jiffies(cfg->forward_delay);
+ system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+
+ system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/multicast_snooping",
+ bridge->ifname, cfg->igmp_snoop ? "1" : "0");
+
+ args[0] = BRCTL_SET_BRIDGE_PRIORITY;
+ args[1] = cfg->priority;
+ system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+
+ if (cfg->flags & BRIDGE_OPT_AGEING_TIME) {
+ args[0] = BRCTL_SET_AGEING_TIME;
+ args[1] = sec_to_jiffies(cfg->ageing_time);
+ system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+ }
+
+ if (cfg->flags & BRIDGE_OPT_HELLO_TIME) {
+ args[0] = BRCTL_SET_BRIDGE_HELLO_TIME;
+ args[1] = sec_to_jiffies(cfg->hello_time);
+ system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+ }
+
+ if (cfg->flags & BRIDGE_OPT_MAX_AGE) {
+ args[0] = BRCTL_SET_BRIDGE_MAX_AGE;
+ args[1] = sec_to_jiffies(cfg->max_age);
+ system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+ }
+
+ return 0;
+}
+
+int system_macvlan_add(struct device *macvlan, struct device *dev, struct macvlan_config *cfg)
+{
+ struct nl_msg *msg;
+ struct nlattr *linkinfo, *data;
+ struct ifinfomsg iim = { .ifi_family = AF_INET };
+ int ifindex = system_if_resolve(dev);
+ int i, rv;
+ static const struct {
+ const char *name;
+ enum macvlan_mode val;
+ } modes[] = {
+ { "private", MACVLAN_MODE_PRIVATE },
+ { "vepa", MACVLAN_MODE_VEPA },
+ { "bridge", MACVLAN_MODE_BRIDGE },
+ { "passthru", MACVLAN_MODE_PASSTHRU },
+ };
+
+ if (ifindex == 0)
+ return -ENOENT;
+
+ msg = nlmsg_alloc_simple(RTM_NEWLINK, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
+
+ if (!msg)
+ return -1;
+
+ nlmsg_append(msg, &iim, sizeof(iim), 0);
+
+ if (cfg->flags & MACVLAN_OPT_MACADDR)
+ nla_put(msg, IFLA_ADDRESS, sizeof(cfg->macaddr), cfg->macaddr);
+ nla_put(msg, IFLA_IFNAME, IFNAMSIZ, macvlan->ifname);
+ nla_put_u32(msg, IFLA_LINK, ifindex);
+
+ if (!(linkinfo = nla_nest_start(msg, IFLA_LINKINFO)))
+ goto nla_put_failure;
+
+ nla_put(msg, IFLA_INFO_KIND, strlen("macvlan"), "macvlan");
+
+ if (!(data = nla_nest_start(msg, IFLA_INFO_DATA)))
+ goto nla_put_failure;
+
+ if (cfg->mode) {
+ for (i = 0; i < ARRAY_SIZE(modes); i++) {
+ if (strcmp(cfg->mode, modes[i].name) != 0)
+ continue;
+
+ nla_put_u32(msg, IFLA_MACVLAN_MODE, modes[i].val);
+ break;
+ }
+ }
+
+ nla_nest_end(msg, data);
+ nla_nest_end(msg, linkinfo);
+
+ rv = system_rtnl_call(msg);
+ if (rv)
+ D(SYSTEM, "Error adding macvlan '%s' over '%s': %d\n", macvlan->ifname, dev->ifname, rv);
+
+ return rv;
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return -ENOMEM;
+}
+
+int system_macvlan_del(struct device *macvlan)
+{
+ struct nl_msg *msg;
+ struct ifinfomsg iim;
+
+ iim.ifi_family = AF_INET;
+ iim.ifi_index = 0;
+
+ msg = nlmsg_alloc_simple(RTM_DELLINK, 0);
+
+ if (!msg)
+ return -1;
+
+ nlmsg_append(msg, &iim, sizeof(iim), 0);
+
+ nla_put(msg, IFLA_INFO_KIND, strlen("macvlan"), "macvlan");
+ nla_put(msg, IFLA_IFNAME, sizeof(macvlan->ifname), macvlan->ifname);
+
+ system_rtnl_call(msg);
+
+ return 0;
+}
+
+static int system_vlan(struct device *dev, int id)
+{
+ struct vlan_ioctl_args ifr = {
+ .cmd = SET_VLAN_NAME_TYPE_CMD,
+ .u.name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD,
+ };
+
+ ioctl(sock_ioctl, SIOCSIFVLAN, &ifr);
+
+ if (id < 0) {
+ ifr.cmd = DEL_VLAN_CMD;
+ ifr.u.VID = 0;
+ } else {
+ ifr.cmd = ADD_VLAN_CMD;
+ ifr.u.VID = id;
+ }
+ strncpy(ifr.device1, dev->ifname, sizeof(ifr.device1));
+ return ioctl(sock_ioctl, SIOCSIFVLAN, &ifr);
+}
+
+int system_vlan_add(struct device *dev, int id)
+{
+ return system_vlan(dev, id);
+}
+
+int system_vlan_del(struct device *dev)
+{
+ return system_vlan(dev, -1);
+}
+
+static void
+system_if_get_settings(struct device *dev, struct device_settings *s)
+{
+ struct ifreq ifr;
+ char buf[10];
+
+ memset(&ifr, 0, sizeof(ifr));
+ strncpy(ifr.ifr_name, dev->ifname, sizeof(ifr.ifr_name));
+
+ if (ioctl(sock_ioctl, SIOCGIFMTU, &ifr) == 0) {
+ s->mtu = ifr.ifr_mtu;
+ s->flags |= DEV_OPT_MTU;
+ }
+
+ if (ioctl(sock_ioctl, SIOCGIFTXQLEN, &ifr) == 0) {
+ s->txqueuelen = ifr.ifr_qlen;
+ s->flags |= DEV_OPT_TXQUEUELEN;
+ }
+
+ if (ioctl(sock_ioctl, SIOCGIFHWADDR, &ifr) == 0) {
+ memcpy(s->macaddr, &ifr.ifr_hwaddr.sa_data, sizeof(s->macaddr));
+ s->flags |= DEV_OPT_MACADDR;
+ }