add prelocal table to manipulate locally destinated traffic
[project/netifd.git] / system-linux.c
1 /*
2  * netifd - network interface daemon
3  * Copyright (C) 2012 Felix Fietkau <nbd@openwrt.org>
4  * Copyright (C) 2013 Jo-Philipp Wich <jow@openwrt.org>
5  * Copyright (C) 2013 Steven Barth <steven@midlink.org>
6  * Copyright (C) 2014 Gioacchino Mazzurco <gio@eigenlab.org>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  */
17 #define _GNU_SOURCE
18
19 #include <sys/socket.h>
20 #include <sys/ioctl.h>
21 #include <sys/stat.h>
22 #include <sys/syscall.h>
23
24 #include <net/if.h>
25 #include <net/if_arp.h>
26
27 #include <arpa/inet.h>
28 #include <netinet/in.h>
29
30 #include <linux/rtnetlink.h>
31 #include <linux/sockios.h>
32 #include <linux/ip.h>
33 #include <linux/if_link.h>
34 #include <linux/if_vlan.h>
35 #include <linux/if_bridge.h>
36 #include <linux/if_tunnel.h>
37 #include <linux/ip6_tunnel.h>
38 #include <linux/ethtool.h>
39 #include <linux/fib_rules.h>
40 #include <linux/version.h>
41
42 #ifndef RTN_FAILED_POLICY
43 #define RTN_FAILED_POLICY 12
44 #endif
45
46 #ifndef RT_TABLE_PRELOCAL
47 #define RT_TABLE_PRELOCAL 128
48 #endif
49
50 #include <string.h>
51 #include <fcntl.h>
52 #include <glob.h>
53 #include <time.h>
54
55 #include <netlink/msg.h>
56 #include <netlink/attr.h>
57 #include <netlink/socket.h>
58 #include <libubox/uloop.h>
59
60 #include "netifd.h"
61 #include "device.h"
62 #include "system.h"
63
64 struct event_socket {
65         struct uloop_fd uloop;
66         struct nl_sock *sock;
67         int bufsize;
68 };
69
70 static int sock_ioctl = -1;
71 static struct nl_sock *sock_rtnl = NULL;
72
73 static int cb_rtnl_event(struct nl_msg *msg, void *arg);
74 static void handle_hotplug_event(struct uloop_fd *u, unsigned int events);
75
76 static char dev_buf[256];
77
78 static void
79 handler_nl_event(struct uloop_fd *u, unsigned int events)
80 {
81         struct event_socket *ev = container_of(u, struct event_socket, uloop);
82         int err;
83         socklen_t errlen = sizeof(err);
84
85         if (!u->error) {
86                 nl_recvmsgs_default(ev->sock);
87                 return;
88         }
89
90         if (getsockopt(u->fd, SOL_SOCKET, SO_ERROR, (void *)&err, &errlen))
91                 goto abort;
92
93         switch(err) {
94         case ENOBUFS:
95                 // Increase rx buffer size on netlink socket
96                 ev->bufsize *= 2;
97                 if (nl_socket_set_buffer_size(ev->sock, ev->bufsize, 0))
98                         goto abort;
99
100                 // Request full dump since some info got dropped
101                 struct rtgenmsg msg = { .rtgen_family = AF_UNSPEC };
102                 nl_send_simple(ev->sock, RTM_GETLINK, NLM_F_DUMP, &msg, sizeof(msg));
103                 break;
104
105         default:
106                 goto abort;
107         }
108         u->error = false;
109         return;
110
111 abort:
112         uloop_fd_delete(&ev->uloop);
113         return;
114 }
115
116 static struct nl_sock *
117 create_socket(int protocol, int groups)
118 {
119         struct nl_sock *sock;
120
121         sock = nl_socket_alloc();
122         if (!sock)
123                 return NULL;
124
125         if (groups)
126                 nl_join_groups(sock, groups);
127
128         if (nl_connect(sock, protocol))
129                 return NULL;
130
131         return sock;
132 }
133
134 static bool
135 create_raw_event_socket(struct event_socket *ev, int protocol, int groups,
136                         uloop_fd_handler cb, int flags)
137 {
138         ev->sock = create_socket(protocol, groups);
139         if (!ev->sock)
140                 return false;
141
142         ev->uloop.fd = nl_socket_get_fd(ev->sock);
143         ev->uloop.cb = cb;
144         if (uloop_fd_add(&ev->uloop, ULOOP_READ|flags))
145                 return false;
146
147         return true;
148 }
149
150 static bool
151 create_event_socket(struct event_socket *ev, int protocol,
152                     int (*cb)(struct nl_msg *msg, void *arg))
153 {
154         if (!create_raw_event_socket(ev, protocol, 0, handler_nl_event, ULOOP_ERROR_CB))
155                 return false;
156
157         // Install the valid custom callback handler
158         nl_socket_modify_cb(ev->sock, NL_CB_VALID, NL_CB_CUSTOM, cb, NULL);
159
160         // Disable sequence number checking on event sockets
161         nl_socket_disable_seq_check(ev->sock);
162
163         // Increase rx buffer size to 65K on event sockets
164         ev->bufsize = 65535;
165         if (nl_socket_set_buffer_size(ev->sock, ev->bufsize, 0))
166                 return false;
167
168         return true;
169 }
170
171 static bool
172 system_rtn_aton(const char *src, unsigned int *dst)
173 {
174         char *e;
175         unsigned int n;
176
177         if (!strcmp(src, "local"))
178                 n = RTN_LOCAL;
179         else if (!strcmp(src, "nat"))
180                 n = RTN_NAT;
181         else if (!strcmp(src, "broadcast"))
182                 n = RTN_BROADCAST;
183         else if (!strcmp(src, "anycast"))
184                 n = RTN_ANYCAST;
185         else if (!strcmp(src, "multicast"))
186                 n = RTN_MULTICAST;
187         else if (!strcmp(src, "prohibit"))
188                 n = RTN_PROHIBIT;
189         else if (!strcmp(src, "unreachable"))
190                 n = RTN_UNREACHABLE;
191         else if (!strcmp(src, "blackhole"))
192                 n = RTN_BLACKHOLE;
193         else if (!strcmp(src, "xresolve"))
194                 n = RTN_XRESOLVE;
195         else if (!strcmp(src, "unicast"))
196                 n = RTN_UNICAST;
197         else if (!strcmp(src, "throw"))
198                 n = RTN_THROW;
199         else if (!strcmp(src, "failed_policy"))
200                 n = RTN_FAILED_POLICY;
201         else {
202                 n = strtoul(src, &e, 0);
203                 if (!e || *e || e == src || n > 255)
204                         return false;
205         }
206
207         *dst = n;
208         return true;
209 }
210
211 static bool
212 system_tos_aton(const char *src, unsigned *dst)
213 {
214         char *e;
215
216         *dst = strtoul(src, &e, 16);
217         if (e == src || *e || *dst > 255)
218                 return false;
219
220         return true;
221 }
222
223 int system_init(void)
224 {
225         static struct event_socket rtnl_event;
226         static struct event_socket hotplug_event;
227
228         sock_ioctl = socket(AF_LOCAL, SOCK_DGRAM, 0);
229         system_fd_set_cloexec(sock_ioctl);
230
231         // Prepare socket for routing / address control
232         sock_rtnl = create_socket(NETLINK_ROUTE, 0);
233         if (!sock_rtnl)
234                 return -1;
235
236         if (!create_event_socket(&rtnl_event, NETLINK_ROUTE, cb_rtnl_event))
237                 return -1;
238
239         if (!create_raw_event_socket(&hotplug_event, NETLINK_KOBJECT_UEVENT, 1,
240                                      handle_hotplug_event, 0))
241                 return -1;
242
243         // Receive network link events form kernel
244         nl_socket_add_membership(rtnl_event.sock, RTNLGRP_LINK);
245
246         return 0;
247 }
248
249 static void system_set_sysctl(const char *path, const char *val)
250 {
251         int fd;
252
253         fd = open(path, O_WRONLY);
254         if (fd < 0)
255                 return;
256
257         if (write(fd, val, strlen(val))) {}
258         close(fd);
259 }
260
261 static void system_set_dev_sysctl(const char *path, const char *device, const char *val)
262 {
263         snprintf(dev_buf, sizeof(dev_buf), path, device);
264         system_set_sysctl(dev_buf, val);
265 }
266
267 static void system_set_disable_ipv6(struct device *dev, const char *val)
268 {
269         system_set_dev_sysctl("/proc/sys/net/ipv6/conf/%s/disable_ipv6", dev->ifname, val);
270 }
271
272 static void system_set_rpfilter(struct device *dev, const char *val)
273 {
274         system_set_dev_sysctl("/proc/sys/net/ipv4/conf/%s/rp_filter", dev->ifname, val);
275 }
276
277 static void system_set_acceptlocal(struct device *dev, const char *val)
278 {
279         system_set_dev_sysctl("/proc/sys/net/ipv4/conf/%s/accept_local", dev->ifname, val);
280 }
281
282 static void system_set_igmpversion(struct device *dev, const char *val)
283 {
284         system_set_dev_sysctl("/proc/sys/net/ipv4/conf/%s/force_igmp_version", dev->ifname, val);
285 }
286
287 static void system_set_mldversion(struct device *dev, const char *val)
288 {
289         system_set_dev_sysctl("/proc/sys/net/ipv6/conf/%s/force_mld_version", dev->ifname, val);
290 }
291
292 static void system_set_neigh4reachabletime(struct device *dev, const char *val)
293 {
294         system_set_dev_sysctl("/proc/sys/net/ipv4/neigh/%s/base_reachable_time_ms", dev->ifname, val);
295 }
296
297 static void system_set_neigh6reachabletime(struct device *dev, const char *val)
298 {
299         system_set_dev_sysctl("/proc/sys/net/ipv6/neigh/%s/base_reachable_time_ms", dev->ifname, val);
300 }
301
302 static int system_get_sysctl(const char *path, char *buf, const size_t buf_sz)
303 {
304         int fd = -1, ret = -1;
305
306         fd = open(path, O_RDONLY);
307         if (fd < 0)
308                 goto out;
309
310         ssize_t len = read(fd, buf, buf_sz - 1);
311         if (len < 0)
312                 goto out;
313
314         ret = buf[len] = 0;
315
316 out:
317         if (fd >= 0)
318                 close(fd);
319
320         return ret;
321 }
322
323 static int
324 system_get_dev_sysctl(const char *path, const char *device, char *buf, const size_t buf_sz)
325 {
326         snprintf(dev_buf, sizeof(dev_buf), path, device);
327         return system_get_sysctl(dev_buf, buf, buf_sz);
328 }
329
330 static int system_get_disable_ipv6(struct device *dev, char *buf, const size_t buf_sz)
331 {
332         return system_get_dev_sysctl("/proc/sys/net/ipv6/conf/%s/disable_ipv6",
333                         dev->ifname, buf, buf_sz);
334 }
335
336 static int system_get_rpfilter(struct device *dev, char *buf, const size_t buf_sz)
337 {
338         return system_get_dev_sysctl("/proc/sys/net/ipv4/conf/%s/rp_filter",
339                         dev->ifname, buf, buf_sz);
340 }
341
342 static int system_get_acceptlocal(struct device *dev, char *buf, const size_t buf_sz)
343 {
344         return system_get_dev_sysctl("/proc/sys/net/ipv4/conf/%s/accept_local",
345                         dev->ifname, buf, buf_sz);
346 }
347
348 static int system_get_igmpversion(struct device *dev, char *buf, const size_t buf_sz)
349 {
350         return system_get_dev_sysctl("/proc/sys/net/ipv4/conf/%s/force_igmp_version",
351                         dev->ifname, buf, buf_sz);
352 }
353
354 static int system_get_mldversion(struct device *dev, char *buf, const size_t buf_sz)
355 {
356         return system_get_dev_sysctl("/proc/sys/net/ipv6/conf/%s/force_mld_version",
357                         dev->ifname, buf, buf_sz);
358 }
359
360 static int system_get_neigh4reachabletime(struct device *dev, char *buf, const size_t buf_sz)
361 {
362         return system_get_dev_sysctl("/proc/sys/net/ipv4/neigh/%s/base_reachable_time_ms",
363                         dev->ifname, buf, buf_sz);
364 }
365
366 static int system_get_neigh6reachabletime(struct device *dev, char *buf, const size_t buf_sz)
367 {
368         return system_get_dev_sysctl("/proc/sys/net/ipv6/neigh/%s/base_reachable_time_ms",
369                         dev->ifname, buf, buf_sz);
370 }
371
372 // Evaluate netlink messages
373 static int cb_rtnl_event(struct nl_msg *msg, void *arg)
374 {
375         struct nlmsghdr *nh = nlmsg_hdr(msg);
376         struct ifinfomsg *ifi = NLMSG_DATA(nh);
377         struct nlattr *nla[__IFLA_MAX];
378         int link_state = 0;
379         char buf[10];
380
381         if (nh->nlmsg_type != RTM_NEWLINK)
382                 goto out;
383
384         nlmsg_parse(nh, sizeof(*ifi), nla, __IFLA_MAX - 1, NULL);
385         if (!nla[IFLA_IFNAME])
386                 goto out;
387
388         struct device *dev = device_get(nla_data(nla[IFLA_IFNAME]), false);
389         if (!dev || dev->type->keep_link_status)
390                 goto out;
391
392         if (!system_get_dev_sysctl("/sys/class/net/%s/carrier", dev->ifname, buf, sizeof(buf)))
393                 link_state = strtoul(buf, NULL, 0);
394
395         device_set_link(dev, link_state ? true : false);
396
397 out:
398         return 0;
399 }
400
401 static void
402 handle_hotplug_msg(char *data, int size)
403 {
404         const char *subsystem = NULL, *interface = NULL;
405         char *cur, *end, *sep;
406         struct device *dev;
407         int skip;
408         bool add;
409
410         if (!strncmp(data, "add@", 4))
411                 add = true;
412         else if (!strncmp(data, "remove@", 7))
413                 add = false;
414         else
415                 return;
416
417         skip = strlen(data) + 1;
418         end = data + size;
419
420         for (cur = data + skip; cur < end; cur += skip) {
421                 skip = strlen(cur) + 1;
422
423                 sep = strchr(cur, '=');
424                 if (!sep)
425                         continue;
426
427                 *sep = 0;
428                 if (!strcmp(cur, "INTERFACE"))
429                         interface = sep + 1;
430                 else if (!strcmp(cur, "SUBSYSTEM")) {
431                         subsystem = sep + 1;
432                         if (strcmp(subsystem, "net") != 0)
433                                 return;
434                 }
435                 if (subsystem && interface)
436                         goto found;
437         }
438         return;
439
440 found:
441         dev = device_get(interface, false);
442         if (!dev)
443                 return;
444
445         if (dev->type != &simple_device_type)
446                 return;
447
448         if (add && system_if_force_external(dev->ifname))
449                 return;
450
451         device_set_present(dev, add);
452 }
453
454 static void
455 handle_hotplug_event(struct uloop_fd *u, unsigned int events)
456 {
457         struct event_socket *ev = container_of(u, struct event_socket, uloop);
458         struct sockaddr_nl nla;
459         unsigned char *buf = NULL;
460         int size;
461
462         while ((size = nl_recv(ev->sock, &nla, &buf, NULL)) > 0) {
463                 if (nla.nl_pid == 0)
464                         handle_hotplug_msg((char *) buf, size);
465
466                 free(buf);
467         }
468 }
469
470 static int system_rtnl_call(struct nl_msg *msg)
471 {
472         int ret;
473
474         ret = nl_send_auto_complete(sock_rtnl, msg);
475         nlmsg_free(msg);
476
477         if (ret < 0)
478                 return ret;
479
480         return nl_wait_for_ack(sock_rtnl);
481 }
482
483 int system_bridge_delbr(struct device *bridge)
484 {
485         return ioctl(sock_ioctl, SIOCBRDELBR, bridge->ifname);
486 }
487
488 static int system_bridge_if(const char *bridge, struct device *dev, int cmd, void *data)
489 {
490         struct ifreq ifr;
491
492         memset(&ifr, 0, sizeof(ifr));
493         if (dev)
494                 ifr.ifr_ifindex = dev->ifindex;
495         else
496                 ifr.ifr_data = data;
497         strncpy(ifr.ifr_name, bridge, sizeof(ifr.ifr_name));
498         return ioctl(sock_ioctl, cmd, &ifr);
499 }
500
501 static bool system_is_bridge(const char *name, char *buf, int buflen)
502 {
503         struct stat st;
504
505         snprintf(buf, buflen, "/sys/devices/virtual/net/%s/bridge", name);
506         if (stat(buf, &st) < 0)
507                 return false;
508
509         return true;
510 }
511
512 static char *system_get_bridge(const char *name, char *buf, int buflen)
513 {
514         char *path;
515         ssize_t len = -1;
516         glob_t gl;
517
518         snprintf(buf, buflen, "/sys/devices/virtual/net/*/brif/%s/bridge", name);
519         if (glob(buf, GLOB_NOSORT, NULL, &gl) < 0)
520                 return NULL;
521
522         if (gl.gl_pathc > 0)
523                 len = readlink(gl.gl_pathv[0], buf, buflen);
524
525         globfree(&gl);
526
527         if (len < 0)
528                 return NULL;
529
530         buf[len] = 0;
531         path = strrchr(buf, '/');
532         if (!path)
533                 return NULL;
534
535         return path + 1;
536 }
537
538 static void system_bridge_set_wireless(const char *bridge, const char *dev)
539 {
540         snprintf(dev_buf, sizeof(dev_buf),
541                  "/sys/devices/virtual/net/%s/brif/%s/multicast_to_unicast",
542                  bridge, dev);
543         system_set_sysctl(dev_buf, "1");
544 }
545
546 int system_bridge_addif(struct device *bridge, struct device *dev)
547 {
548         char *oldbr;
549         int ret = 0;
550
551         oldbr = system_get_bridge(dev->ifname, dev_buf, sizeof(dev_buf));
552         if (!oldbr || strcmp(oldbr, bridge->ifname) != 0)
553                 ret = system_bridge_if(bridge->ifname, dev, SIOCBRADDIF, NULL);
554
555         if (dev->wireless)
556                 system_bridge_set_wireless(bridge->ifname, dev->ifname);
557
558         return ret;
559 }
560
561 int system_bridge_delif(struct device *bridge, struct device *dev)
562 {
563         return system_bridge_if(bridge->ifname, dev, SIOCBRDELIF, NULL);
564 }
565
566 int system_if_resolve(struct device *dev)
567 {
568         struct ifreq ifr;
569         strncpy(ifr.ifr_name, dev->ifname, sizeof(ifr.ifr_name));
570         if (!ioctl(sock_ioctl, SIOCGIFINDEX, &ifr))
571                 return ifr.ifr_ifindex;
572         else
573                 return 0;
574 }
575
576 static int system_if_flags(const char *ifname, unsigned add, unsigned rem)
577 {
578         struct ifreq ifr;
579
580         memset(&ifr, 0, sizeof(ifr));
581         strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
582         ioctl(sock_ioctl, SIOCGIFFLAGS, &ifr);
583         ifr.ifr_flags |= add;
584         ifr.ifr_flags &= ~rem;
585         return ioctl(sock_ioctl, SIOCSIFFLAGS, &ifr);
586 }
587
588 struct clear_data {
589         struct nl_msg *msg;
590         struct device *dev;
591         int type;
592         int size;
593         int af;
594 };
595
596
597 static bool check_ifaddr(struct nlmsghdr *hdr, int ifindex)
598 {
599         struct ifaddrmsg *ifa = NLMSG_DATA(hdr);
600
601         return ifa->ifa_index == ifindex;
602 }
603
604 static bool check_route(struct nlmsghdr *hdr, int ifindex)
605 {
606         struct rtmsg *r = NLMSG_DATA(hdr);
607         struct nlattr *tb[__RTA_MAX];
608
609         if (r->rtm_protocol == RTPROT_KERNEL &&
610             r->rtm_family == AF_INET6)
611                 return false;
612
613         nlmsg_parse(hdr, sizeof(struct rtmsg), tb, __RTA_MAX - 1, NULL);
614         if (!tb[RTA_OIF])
615                 return false;
616
617         return *(int *)RTA_DATA(tb[RTA_OIF]) == ifindex;
618 }
619
620 static bool check_rule(struct nlmsghdr *hdr, int ifindex)
621 {
622         return true;
623 }
624
625 static int cb_clear_event(struct nl_msg *msg, void *arg)
626 {
627         struct clear_data *clr = arg;
628         struct nlmsghdr *hdr = nlmsg_hdr(msg);
629         bool (*cb)(struct nlmsghdr *, int ifindex);
630         int type;
631
632         switch(clr->type) {
633         case RTM_GETADDR:
634                 type = RTM_DELADDR;
635                 if (hdr->nlmsg_type != RTM_NEWADDR)
636                         return NL_SKIP;
637
638                 cb = check_ifaddr;
639                 break;
640         case RTM_GETROUTE:
641                 type = RTM_DELROUTE;
642                 if (hdr->nlmsg_type != RTM_NEWROUTE)
643                         return NL_SKIP;
644
645                 cb = check_route;
646                 break;
647         case RTM_GETRULE:
648                 type = RTM_DELRULE;
649                 if (hdr->nlmsg_type != RTM_NEWRULE)
650                         return NL_SKIP;
651
652                 cb = check_rule;
653                 break;
654         default:
655                 return NL_SKIP;
656         }
657
658         if (!cb(hdr, clr->dev ? clr->dev->ifindex : 0))
659                 return NL_SKIP;
660
661         if (type == RTM_DELRULE)
662                 D(SYSTEM, "Remove a rule\n");
663         else
664                 D(SYSTEM, "Remove %s from device %s\n",
665                   type == RTM_DELADDR ? "an address" : "a route",
666                   clr->dev->ifname);
667         memcpy(nlmsg_hdr(clr->msg), hdr, hdr->nlmsg_len);
668         hdr = nlmsg_hdr(clr->msg);
669         hdr->nlmsg_type = type;
670         hdr->nlmsg_flags = NLM_F_REQUEST;
671
672         nl_socket_disable_auto_ack(sock_rtnl);
673         nl_send_auto_complete(sock_rtnl, clr->msg);
674         nl_socket_enable_auto_ack(sock_rtnl);
675
676         return NL_SKIP;
677 }
678
679 static int
680 cb_finish_event(struct nl_msg *msg, void *arg)
681 {
682         int *pending = arg;
683         *pending = 0;
684         return NL_STOP;
685 }
686
687 static int
688 error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err, void *arg)
689 {
690         int *pending = arg;
691         *pending = err->error;
692         return NL_STOP;
693 }
694
695 static void
696 system_if_clear_entries(struct device *dev, int type, int af)
697 {
698         struct clear_data clr;
699         struct nl_cb *cb = nl_cb_alloc(NL_CB_DEFAULT);
700         struct rtmsg rtm = {
701                 .rtm_family = af,
702                 .rtm_flags = RTM_F_CLONED,
703         };
704         int flags = NLM_F_DUMP;
705         int pending = 1;
706
707         clr.af = af;
708         clr.dev = dev;
709         clr.type = type;
710         switch (type) {
711         case RTM_GETADDR:
712         case RTM_GETRULE:
713                 clr.size = sizeof(struct rtgenmsg);
714                 break;
715         case RTM_GETROUTE:
716                 clr.size = sizeof(struct rtmsg);
717                 break;
718         default:
719                 return;
720         }
721
722         if (!cb)
723                 return;
724
725         clr.msg = nlmsg_alloc_simple(type, flags);
726         if (!clr.msg)
727                 goto out;
728
729         nlmsg_append(clr.msg, &rtm, clr.size, 0);
730         nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, cb_clear_event, &clr);
731         nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, cb_finish_event, &pending);
732         nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &pending);
733
734         nl_send_auto_complete(sock_rtnl, clr.msg);
735         while (pending > 0)
736                 nl_recvmsgs(sock_rtnl, cb);
737
738         nlmsg_free(clr.msg);
739 out:
740         nl_cb_put(cb);
741 }
742
743 /*
744  * Clear bridge (membership) state and bring down device
745  */
746 void system_if_clear_state(struct device *dev)
747 {
748         static char buf[256];
749         char *bridge;
750
751         device_set_ifindex(dev, system_if_resolve(dev));
752         if (dev->external || !dev->ifindex)
753                 return;
754
755         system_if_flags(dev->ifname, 0, IFF_UP);
756
757         if (system_is_bridge(dev->ifname, buf, sizeof(buf))) {
758                 D(SYSTEM, "Delete existing bridge named '%s'\n", dev->ifname);
759                 system_bridge_delbr(dev);
760                 return;
761         }
762
763         bridge = system_get_bridge(dev->ifname, buf, sizeof(buf));
764         if (bridge) {
765                 D(SYSTEM, "Remove device '%s' from bridge '%s'\n", dev->ifname, bridge);
766                 system_bridge_if(bridge, dev, SIOCBRDELIF, NULL);
767         }
768
769         system_if_clear_entries(dev, RTM_GETROUTE, AF_INET);
770         system_if_clear_entries(dev, RTM_GETADDR, AF_INET);
771         system_if_clear_entries(dev, RTM_GETROUTE, AF_INET6);
772         system_if_clear_entries(dev, RTM_GETADDR, AF_INET6);
773         system_set_disable_ipv6(dev, "0");
774 }
775
776 static inline unsigned long
777 sec_to_jiffies(int val)
778 {
779         return (unsigned long) val * 100;
780 }
781
782 int system_bridge_addbr(struct device *bridge, struct bridge_config *cfg)
783 {
784         unsigned long args[4] = {};
785
786         if (ioctl(sock_ioctl, SIOCBRADDBR, bridge->ifname) < 0)
787                 return -1;
788
789         args[0] = BRCTL_SET_BRIDGE_STP_STATE;
790         args[1] = !!cfg->stp;
791         system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
792
793         args[0] = BRCTL_SET_BRIDGE_FORWARD_DELAY;
794         args[1] = sec_to_jiffies(cfg->forward_delay);
795         system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
796
797         system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/multicast_snooping",
798                 bridge->ifname, cfg->igmp_snoop ? "1" : "0");
799
800         system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/multicast_querier",
801                 bridge->ifname, cfg->igmp_snoop ? "1" : "0");
802
803         args[0] = BRCTL_SET_BRIDGE_PRIORITY;
804         args[1] = cfg->priority;
805         system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
806
807         if (cfg->flags & BRIDGE_OPT_AGEING_TIME) {
808                 args[0] = BRCTL_SET_AGEING_TIME;
809                 args[1] = sec_to_jiffies(cfg->ageing_time);
810                 system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
811         }
812
813         if (cfg->flags & BRIDGE_OPT_HELLO_TIME) {
814                 args[0] = BRCTL_SET_BRIDGE_HELLO_TIME;
815                 args[1] = sec_to_jiffies(cfg->hello_time);
816                 system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
817         }
818
819         if (cfg->flags & BRIDGE_OPT_MAX_AGE) {
820                 args[0] = BRCTL_SET_BRIDGE_MAX_AGE;
821                 args[1] = sec_to_jiffies(cfg->max_age);
822                 system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
823         }
824
825         return 0;
826 }
827
828 int system_macvlan_add(struct device *macvlan, struct device *dev, struct macvlan_config *cfg)
829 {
830         struct nl_msg *msg;
831         struct nlattr *linkinfo, *data;
832         struct ifinfomsg iim = { .ifi_family = AF_UNSPEC, };
833         int i, rv;
834         static const struct {
835                 const char *name;
836                 enum macvlan_mode val;
837         } modes[] = {
838                 { "private", MACVLAN_MODE_PRIVATE },
839                 { "vepa", MACVLAN_MODE_VEPA },
840                 { "bridge", MACVLAN_MODE_BRIDGE },
841                 { "passthru", MACVLAN_MODE_PASSTHRU },
842         };
843
844         msg = nlmsg_alloc_simple(RTM_NEWLINK, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
845
846         if (!msg)
847                 return -1;
848
849         nlmsg_append(msg, &iim, sizeof(iim), 0);
850
851         if (cfg->flags & MACVLAN_OPT_MACADDR)
852                 nla_put(msg, IFLA_ADDRESS, sizeof(cfg->macaddr), cfg->macaddr);
853         nla_put_string(msg, IFLA_IFNAME, macvlan->ifname);
854         nla_put_u32(msg, IFLA_LINK, dev->ifindex);
855
856         if (!(linkinfo = nla_nest_start(msg, IFLA_LINKINFO)))
857                 goto nla_put_failure;
858
859         nla_put_string(msg, IFLA_INFO_KIND, "macvlan");
860
861         if (!(data = nla_nest_start(msg, IFLA_INFO_DATA)))
862                 goto nla_put_failure;
863
864         if (cfg->mode) {
865                 for (i = 0; i < ARRAY_SIZE(modes); i++) {
866                         if (strcmp(cfg->mode, modes[i].name) != 0)
867                                 continue;
868
869                         nla_put_u32(msg, IFLA_MACVLAN_MODE, modes[i].val);
870                         break;
871                 }
872         }
873
874         nla_nest_end(msg, data);
875         nla_nest_end(msg, linkinfo);
876
877         rv = system_rtnl_call(msg);
878         if (rv)
879                 D(SYSTEM, "Error adding macvlan '%s' over '%s': %d\n", macvlan->ifname, dev->ifname, rv);
880
881         return rv;
882
883 nla_put_failure:
884         nlmsg_free(msg);
885         return -ENOMEM;
886 }
887
888 static int system_link_del(const char *ifname)
889 {
890         struct nl_msg *msg;
891         struct ifinfomsg iim = {
892                 .ifi_family = AF_UNSPEC,
893                 .ifi_index = 0,
894         };
895
896         msg = nlmsg_alloc_simple(RTM_DELLINK, NLM_F_REQUEST);
897
898         if (!msg)
899                 return -1;
900
901         nlmsg_append(msg, &iim, sizeof(iim), 0);
902         nla_put_string(msg, IFLA_IFNAME, ifname);
903         return system_rtnl_call(msg);
904 }
905
906 int system_macvlan_del(struct device *macvlan)
907 {
908         return system_link_del(macvlan->ifname);
909 }
910
911 static int system_vlan(struct device *dev, int id)
912 {
913         struct vlan_ioctl_args ifr = {
914                 .cmd = SET_VLAN_NAME_TYPE_CMD,
915                 .u.name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD,
916         };
917
918         ioctl(sock_ioctl, SIOCSIFVLAN, &ifr);
919
920         if (id < 0) {
921                 ifr.cmd = DEL_VLAN_CMD;
922                 ifr.u.VID = 0;
923         } else {
924                 ifr.cmd = ADD_VLAN_CMD;
925                 ifr.u.VID = id;
926         }
927         strncpy(ifr.device1, dev->ifname, sizeof(ifr.device1));
928         return ioctl(sock_ioctl, SIOCSIFVLAN, &ifr);
929 }
930
931 int system_vlan_add(struct device *dev, int id)
932 {
933         return system_vlan(dev, id);
934 }
935
936 int system_vlan_del(struct device *dev)
937 {
938         return system_vlan(dev, -1);
939 }
940
941 int system_vlandev_add(struct device *vlandev, struct device *dev, struct vlandev_config *cfg)
942 {
943         struct nl_msg *msg;
944         struct nlattr *linkinfo, *data;
945         struct ifinfomsg iim = { .ifi_family = AF_UNSPEC };
946         int rv;
947
948         msg = nlmsg_alloc_simple(RTM_NEWLINK, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
949
950         if (!msg)
951                 return -1;
952
953         nlmsg_append(msg, &iim, sizeof(iim), 0);
954         nla_put_string(msg, IFLA_IFNAME, vlandev->ifname);
955         nla_put_u32(msg, IFLA_LINK, dev->ifindex);
956         
957         if (!(linkinfo = nla_nest_start(msg, IFLA_LINKINFO)))
958                 goto nla_put_failure;
959         
960         nla_put_string(msg, IFLA_INFO_KIND, "vlan");
961
962         if (!(data = nla_nest_start(msg, IFLA_INFO_DATA)))
963                 goto nla_put_failure;
964
965         nla_put_u16(msg, IFLA_VLAN_ID, cfg->vid);
966
967 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
968         nla_put_u16(msg, IFLA_VLAN_PROTOCOL, htons(cfg->proto));
969 #else
970         if(cfg->proto == VLAN_PROTO_8021AD)
971                 netifd_log_message(L_WARNING, "%s Your kernel is older than linux 3.10.0, 802.1ad is not supported defaulting to 802.1q", vlandev->type->name);
972 #endif
973
974         nla_nest_end(msg, data);
975         nla_nest_end(msg, linkinfo);
976
977         rv = system_rtnl_call(msg);
978         if (rv)
979                 D(SYSTEM, "Error adding vlandev '%s' over '%s': %d\n", vlandev->ifname, dev->ifname, rv);
980
981         return rv;
982
983 nla_put_failure:
984         nlmsg_free(msg);
985         return -ENOMEM;
986 }
987
988 int system_vlandev_del(struct device *vlandev)
989 {
990         return system_link_del(vlandev->ifname);
991 }
992
993 static void
994 system_if_get_settings(struct device *dev, struct device_settings *s)
995 {
996         struct ifreq ifr;
997         char buf[10];
998
999         memset(&ifr, 0, sizeof(ifr));
1000         strncpy(ifr.ifr_name, dev->ifname, sizeof(ifr.ifr_name));
1001
1002         if (ioctl(sock_ioctl, SIOCGIFMTU, &ifr) == 0) {
1003                 s->mtu = ifr.ifr_mtu;
1004                 s->flags |= DEV_OPT_MTU;
1005         }
1006
1007         if (ioctl(sock_ioctl, SIOCGIFTXQLEN, &ifr) == 0) {
1008                 s->txqueuelen = ifr.ifr_qlen;
1009                 s->flags |= DEV_OPT_TXQUEUELEN;
1010         }
1011
1012         if (ioctl(sock_ioctl, SIOCGIFHWADDR, &ifr) == 0) {
1013                 memcpy(s->macaddr, &ifr.ifr_hwaddr.sa_data, sizeof(s->macaddr));
1014                 s->flags |= DEV_OPT_MACADDR;
1015         }
1016
1017         if (!system_get_disable_ipv6(dev, buf, sizeof(buf))) {
1018                 s->ipv6 = !strtoul(buf, NULL, 0);
1019                 s->flags |= DEV_OPT_IPV6;
1020         }
1021
1022         if (ioctl(sock_ioctl, SIOCGIFFLAGS, &ifr) == 0) {
1023                 s->promisc = ifr.ifr_flags & IFF_PROMISC;
1024                 s->flags |= DEV_OPT_PROMISC;
1025         }
1026
1027         if (!system_get_rpfilter(dev, buf, sizeof(buf))) {
1028                 s->rpfilter = strtoul(buf, NULL, 0);
1029                 s->flags |= DEV_OPT_RPFILTER;
1030         }
1031
1032         if (!system_get_acceptlocal(dev, buf, sizeof(buf))) {
1033                 s->acceptlocal = strtoul(buf, NULL, 0);
1034                 s->flags |= DEV_OPT_ACCEPTLOCAL;
1035         }
1036
1037         if (!system_get_igmpversion(dev, buf, sizeof(buf))) {
1038                 s->igmpversion = strtoul(buf, NULL, 0);
1039                 s->flags |= DEV_OPT_IGMPVERSION;
1040         }
1041
1042         if (!system_get_mldversion(dev, buf, sizeof(buf))) {
1043                 s->mldversion = strtoul(buf, NULL, 0);
1044                 s->flags |= DEV_OPT_MLDVERSION;
1045         }
1046
1047         if (!system_get_neigh4reachabletime(dev, buf, sizeof(buf))) {
1048                 s->neigh4reachabletime = strtoul(buf, NULL, 0);
1049                 s->flags |= DEV_OPT_NEIGHREACHABLETIME;
1050         }
1051
1052         if (!system_get_neigh6reachabletime(dev, buf, sizeof(buf))) {
1053                 s->neigh6reachabletime = strtoul(buf, NULL, 0);
1054                 s->flags |= DEV_OPT_NEIGHREACHABLETIME;
1055         }
1056 }
1057
1058 void
1059 system_if_apply_settings(struct device *dev, struct device_settings *s, unsigned int apply_mask)
1060 {
1061         struct ifreq ifr;
1062
1063         if (!apply_mask)
1064                 return;
1065
1066         memset(&ifr, 0, sizeof(ifr));
1067         strncpy(ifr.ifr_name, dev->ifname, sizeof(ifr.ifr_name));
1068         if (s->flags & DEV_OPT_MTU & apply_mask) {
1069                 ifr.ifr_mtu = s->mtu;
1070                 if (ioctl(sock_ioctl, SIOCSIFMTU, &ifr) < 0)
1071                         s->flags &= ~DEV_OPT_MTU;
1072         }
1073         if (s->flags & DEV_OPT_TXQUEUELEN & apply_mask) {
1074                 ifr.ifr_qlen = s->txqueuelen;
1075                 if (ioctl(sock_ioctl, SIOCSIFTXQLEN, &ifr) < 0)
1076                         s->flags &= ~DEV_OPT_TXQUEUELEN;
1077         }
1078         if ((s->flags & DEV_OPT_MACADDR & apply_mask) && !dev->external) {
1079                 ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
1080                 memcpy(&ifr.ifr_hwaddr.sa_data, s->macaddr, sizeof(s->macaddr));
1081                 if (ioctl(sock_ioctl, SIOCSIFHWADDR, &ifr) < 0)
1082                         s->flags &= ~DEV_OPT_MACADDR;
1083         }
1084         if (s->flags & DEV_OPT_IPV6 & apply_mask)
1085                 system_set_disable_ipv6(dev, s->ipv6 ? "0" : "1");
1086         if (s->flags & DEV_OPT_PROMISC & apply_mask) {
1087                 if (system_if_flags(dev->ifname, s->promisc ? IFF_PROMISC : 0,
1088                                     !s->promisc ? IFF_PROMISC : 0) < 0)
1089                         s->flags &= ~DEV_OPT_PROMISC;
1090         }
1091         if (s->flags & DEV_OPT_RPFILTER & apply_mask) {
1092                 char buf[2];
1093
1094                 snprintf(buf, sizeof(buf), "%d", s->rpfilter);
1095                 system_set_rpfilter(dev, buf);
1096         }
1097         if (s->flags & DEV_OPT_ACCEPTLOCAL & apply_mask)
1098                 system_set_acceptlocal(dev, s->acceptlocal ? "1" : "0");
1099         if (s->flags & DEV_OPT_IGMPVERSION & apply_mask) {
1100                 char buf[2];
1101
1102                 snprintf(buf, sizeof(buf), "%d", s->igmpversion);
1103                 system_set_igmpversion(dev, buf);
1104         }
1105         if (s->flags & DEV_OPT_MLDVERSION & apply_mask) {
1106                 char buf[2];
1107
1108                 snprintf(buf, sizeof(buf), "%d", s->mldversion);
1109                 system_set_mldversion(dev, buf);
1110         }
1111         if (s->flags & DEV_OPT_NEIGHREACHABLETIME & apply_mask) {
1112                 char buf[12];
1113
1114                 snprintf(buf, sizeof(buf), "%d", s->neigh4reachabletime);
1115                 system_set_neigh4reachabletime(dev, buf);
1116                 snprintf(buf, sizeof(buf), "%d", s->neigh6reachabletime);
1117                 system_set_neigh6reachabletime(dev, buf);
1118         }
1119 }
1120
1121 int system_if_up(struct device *dev)
1122 {
1123         system_if_get_settings(dev, &dev->orig_settings);
1124         /* Only keep orig settings based on what needs to be set */
1125         dev->orig_settings.flags &= dev->settings.flags;
1126         system_if_apply_settings(dev, &dev->settings, dev->settings.flags);
1127         return system_if_flags(dev->ifname, IFF_UP, 0);
1128 }
1129
1130 int system_if_down(struct device *dev)
1131 {
1132         int ret = system_if_flags(dev->ifname, 0, IFF_UP);
1133         system_if_apply_settings(dev, &dev->orig_settings, dev->orig_settings.flags);
1134         return ret;
1135 }
1136
1137 struct if_check_data {
1138         struct device *dev;
1139         int pending;
1140         int ret;
1141 };
1142
1143 #ifndef IFF_LOWER_UP
1144 #define IFF_LOWER_UP    0x10000
1145 #endif
1146
1147 static int cb_if_check_valid(struct nl_msg *msg, void *arg)
1148 {
1149         struct nlmsghdr *nh = nlmsg_hdr(msg);
1150         struct ifinfomsg *ifi = NLMSG_DATA(nh);
1151         struct if_check_data *chk = (struct if_check_data *)arg;
1152
1153         if (nh->nlmsg_type != RTM_NEWLINK)
1154                 return NL_SKIP;
1155
1156         device_set_present(chk->dev, ifi->ifi_index > 0 ? true : false);
1157         device_set_link(chk->dev, ifi->ifi_flags & IFF_LOWER_UP ? true : false);
1158
1159         return NL_OK;
1160 }
1161
1162 static int cb_if_check_ack(struct nl_msg *msg, void *arg)
1163 {
1164         struct if_check_data *chk = (struct if_check_data *)arg;
1165         chk->pending = 0;
1166         return NL_STOP;
1167 }
1168
1169 static int cb_if_check_error(struct sockaddr_nl *nla, struct nlmsgerr *err, void *arg)
1170 {
1171         struct if_check_data *chk = (struct if_check_data *)arg;
1172
1173         device_set_present(chk->dev, false);
1174         device_set_link(chk->dev, false);
1175         chk->pending = err->error;
1176
1177         return NL_STOP;
1178 }
1179
1180 int system_if_check(struct device *dev)
1181 {
1182         struct nl_cb *cb = nl_cb_alloc(NL_CB_DEFAULT);
1183         struct nl_msg *msg;
1184         struct ifinfomsg ifi = {
1185                 .ifi_family = AF_UNSPEC,
1186                 .ifi_index = 0,
1187         };
1188         struct if_check_data chk = {
1189                 .dev = dev,
1190                 .pending = 1,
1191         };
1192         int ret = 1;
1193
1194         msg = nlmsg_alloc_simple(RTM_GETLINK, 0);
1195         if (!msg || nlmsg_append(msg, &ifi, sizeof(ifi), 0) ||
1196             nla_put_string(msg, IFLA_IFNAME, dev->ifname))
1197                 goto out;
1198
1199         nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, cb_if_check_valid, &chk);
1200         nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, cb_if_check_ack, &chk);
1201         nl_cb_err(cb, NL_CB_CUSTOM, cb_if_check_error, &chk);
1202
1203         nl_send_auto_complete(sock_rtnl, msg);
1204         while (chk.pending > 0)
1205                 nl_recvmsgs(sock_rtnl, cb);
1206
1207         nlmsg_free(msg);
1208         ret = chk.pending;
1209
1210 out:
1211         nl_cb_put(cb);
1212         return ret;
1213 }
1214
1215 struct device *
1216 system_if_get_parent(struct device *dev)
1217 {
1218         char buf[64], *devname;
1219         int ifindex, iflink, len;
1220         FILE *f;
1221
1222         snprintf(buf, sizeof(buf), "/sys/class/net/%s/iflink", dev->ifname);
1223         f = fopen(buf, "r");
1224         if (!f)
1225                 return NULL;
1226
1227         len = fread(buf, 1, sizeof(buf) - 1, f);
1228         fclose(f);
1229
1230         if (len <= 0)
1231                 return NULL;
1232
1233         buf[len] = 0;
1234         iflink = strtoul(buf, NULL, 0);
1235         ifindex = system_if_resolve(dev);
1236         if (!iflink || iflink == ifindex)
1237                 return NULL;
1238
1239         devname = if_indextoname(iflink, buf);
1240         if (!devname)
1241                 return NULL;
1242
1243         return device_get(devname, true);
1244 }
1245
1246 static bool
1247 read_string_file(int dir_fd, const char *file, char *buf, int len)
1248 {
1249         bool ret = false;
1250         char *c;
1251         int fd;
1252
1253         fd = openat(dir_fd, file, O_RDONLY);
1254         if (fd < 0)
1255                 return false;
1256
1257 retry:
1258         len = read(fd, buf, len - 1);
1259         if (len < 0) {
1260                 if (errno == EINTR)
1261                         goto retry;
1262         } else if (len > 0) {
1263                         buf[len] = 0;
1264
1265                         c = strchr(buf, '\n');
1266                         if (c)
1267                                 *c = 0;
1268
1269                         ret = true;
1270         }
1271
1272         close(fd);
1273
1274         return ret;
1275 }
1276
1277 static bool
1278 read_uint64_file(int dir_fd, const char *file, uint64_t *val)
1279 {
1280         char buf[64];
1281         bool ret = false;
1282
1283         ret = read_string_file(dir_fd, file, buf, sizeof(buf));
1284         if (ret)
1285                 *val = strtoull(buf, NULL, 0);
1286
1287         return ret;
1288 }
1289
1290 /* Assume advertised flags == supported flags */
1291 static const struct {
1292         uint32_t mask;
1293         const char *name;
1294 } ethtool_link_modes[] = {
1295         { ADVERTISED_10baseT_Half, "10H" },
1296         { ADVERTISED_10baseT_Full, "10F" },
1297         { ADVERTISED_100baseT_Half, "100H" },
1298         { ADVERTISED_100baseT_Full, "100F" },
1299         { ADVERTISED_1000baseT_Half, "1000H" },
1300         { ADVERTISED_1000baseT_Full, "1000F" },
1301 };
1302
1303 static void system_add_link_modes(struct blob_buf *b, __u32 mask)
1304 {
1305         int i;
1306         for (i = 0; i < ARRAY_SIZE(ethtool_link_modes); i++) {
1307                 if (mask & ethtool_link_modes[i].mask)
1308                         blobmsg_add_string(b, NULL, ethtool_link_modes[i].name);
1309         }
1310 }
1311
1312 bool
1313 system_if_force_external(const char *ifname)
1314 {
1315         char buf[64];
1316         struct stat s;
1317
1318         snprintf(buf, sizeof(buf), "/sys/class/net/%s/phy80211", ifname);
1319         return stat(buf, &s) == 0;
1320 }
1321
1322 int
1323 system_if_dump_info(struct device *dev, struct blob_buf *b)
1324 {
1325         struct ethtool_cmd ecmd;
1326         struct ifreq ifr;
1327         char buf[64], *s;
1328         void *c;
1329         int dir_fd;
1330
1331         snprintf(buf, sizeof(buf), "/sys/class/net/%s", dev->ifname);
1332         dir_fd = open(buf, O_DIRECTORY);
1333
1334         memset(&ecmd, 0, sizeof(ecmd));
1335         memset(&ifr, 0, sizeof(ifr));
1336         strcpy(ifr.ifr_name, dev->ifname);
1337         ifr.ifr_data = (caddr_t) &ecmd;
1338         ecmd.cmd = ETHTOOL_GSET;
1339
1340         if (ioctl(sock_ioctl, SIOCETHTOOL, &ifr) == 0) {
1341                 c = blobmsg_open_array(b, "link-advertising");
1342                 system_add_link_modes(b, ecmd.advertising);
1343                 blobmsg_close_array(b, c);
1344
1345                 c = blobmsg_open_array(b, "link-supported");
1346                 system_add_link_modes(b, ecmd.supported);
1347                 blobmsg_close_array(b, c);
1348
1349                 s = blobmsg_alloc_string_buffer(b, "speed", 8);
1350                 snprintf(s, 8, "%d%c", ethtool_cmd_speed(&ecmd),
1351                         ecmd.duplex == DUPLEX_HALF ? 'H' : 'F');
1352                 blobmsg_add_string_buffer(b);
1353         }
1354
1355         close(dir_fd);
1356         return 0;
1357 }
1358
1359 int
1360 system_if_dump_stats(struct device *dev, struct blob_buf *b)
1361 {
1362         const char *const counters[] = {
1363                 "collisions",     "rx_frame_errors",   "tx_compressed",
1364                 "multicast",      "rx_length_errors",  "tx_dropped",
1365                 "rx_bytes",       "rx_missed_errors",  "tx_errors",
1366                 "rx_compressed",  "rx_over_errors",    "tx_fifo_errors",
1367                 "rx_crc_errors",  "rx_packets",        "tx_heartbeat_errors",
1368                 "rx_dropped",     "tx_aborted_errors", "tx_packets",
1369                 "rx_errors",      "tx_bytes",          "tx_window_errors",
1370                 "rx_fifo_errors", "tx_carrier_errors",
1371         };
1372         char buf[64];
1373         int stats_dir;
1374         int i;
1375         uint64_t val = 0;
1376
1377         snprintf(buf, sizeof(buf), "/sys/class/net/%s/statistics", dev->ifname);
1378         stats_dir = open(buf, O_DIRECTORY);
1379         if (stats_dir < 0)
1380                 return -1;
1381
1382         for (i = 0; i < ARRAY_SIZE(counters); i++)
1383                 if (read_uint64_file(stats_dir, counters[i], &val))
1384                         blobmsg_add_u64(b, counters[i], val);
1385
1386         close(stats_dir);
1387         return 0;
1388 }
1389
1390 static int system_addr(struct device *dev, struct device_addr *addr, int cmd)
1391 {
1392         bool v4 = ((addr->flags & DEVADDR_FAMILY) == DEVADDR_INET4);
1393         int alen = v4 ? 4 : 16;
1394         unsigned int flags = 0;
1395         struct ifaddrmsg ifa = {
1396                 .ifa_family = (alen == 4) ? AF_INET : AF_INET6,
1397                 .ifa_prefixlen = addr->mask,
1398                 .ifa_index = dev->ifindex,
1399         };
1400
1401         struct nl_msg *msg;
1402         if (cmd == RTM_NEWADDR)
1403                 flags |= NLM_F_CREATE | NLM_F_REPLACE;
1404
1405         msg = nlmsg_alloc_simple(cmd, flags);
1406         if (!msg)
1407                 return -1;
1408
1409         nlmsg_append(msg, &ifa, sizeof(ifa), 0);
1410         nla_put(msg, IFA_LOCAL, alen, &addr->addr);
1411         if (v4) {
1412                 if (addr->broadcast)
1413                         nla_put_u32(msg, IFA_BROADCAST, addr->broadcast);
1414                 if (addr->point_to_point)
1415                         nla_put_u32(msg, IFA_ADDRESS, addr->point_to_point);
1416         } else {
1417                 time_t now = system_get_rtime();
1418                 struct ifa_cacheinfo cinfo = {0xffffffffU, 0xffffffffU, 0, 0};
1419
1420                 if (addr->preferred_until) {
1421                         int64_t preferred = addr->preferred_until - now;
1422                         if (preferred < 0)
1423                                 preferred = 0;
1424                         else if (preferred > UINT32_MAX)
1425                                 preferred = UINT32_MAX;
1426
1427                         cinfo.ifa_prefered = preferred;
1428                 }
1429
1430                 if (addr->valid_until) {
1431                         int64_t valid = addr->valid_until - now;
1432                         if (valid <= 0)
1433                                 return -1;
1434                         else if (valid > UINT32_MAX)
1435                                 valid = UINT32_MAX;
1436
1437                         cinfo.ifa_valid = valid;
1438                 }
1439
1440                 nla_put(msg, IFA_CACHEINFO, sizeof(cinfo), &cinfo);
1441         }
1442
1443         return system_rtnl_call(msg);
1444 }
1445
1446 int system_add_address(struct device *dev, struct device_addr *addr)
1447 {
1448         return system_addr(dev, addr, RTM_NEWADDR);
1449 }
1450
1451 int system_del_address(struct device *dev, struct device_addr *addr)
1452 {
1453         return system_addr(dev, addr, RTM_DELADDR);
1454 }
1455
1456 static int system_rt(struct device *dev, struct device_route *route, int cmd)
1457 {
1458         int alen = ((route->flags & DEVADDR_FAMILY) == DEVADDR_INET4) ? 4 : 16;
1459         bool have_gw;
1460         unsigned int flags = 0;
1461
1462         if (alen == 4)
1463                 have_gw = !!route->nexthop.in.s_addr;
1464         else
1465                 have_gw = route->nexthop.in6.s6_addr32[0] ||
1466                         route->nexthop.in6.s6_addr32[1] ||
1467                         route->nexthop.in6.s6_addr32[2] ||
1468                         route->nexthop.in6.s6_addr32[3];
1469
1470         unsigned int table = (route->flags & (DEVROUTE_TABLE | DEVROUTE_SRCTABLE))
1471                         ? route->table : RT_TABLE_MAIN;
1472
1473         struct rtmsg rtm = {
1474                 .rtm_family = (alen == 4) ? AF_INET : AF_INET6,
1475                 .rtm_dst_len = route->mask,
1476                 .rtm_src_len = route->sourcemask,
1477                 .rtm_table = (table < 256) ? table : RT_TABLE_UNSPEC,
1478                 .rtm_protocol = (route->flags & DEVADDR_KERNEL) ? RTPROT_KERNEL : RTPROT_STATIC,
1479                 .rtm_scope = RT_SCOPE_NOWHERE,
1480                 .rtm_type = (cmd == RTM_DELROUTE) ? 0: RTN_UNICAST,
1481                 .rtm_flags = (route->flags & DEVROUTE_ONLINK) ? RTNH_F_ONLINK : 0,
1482         };
1483         struct nl_msg *msg;
1484
1485         if (cmd == RTM_NEWROUTE) {
1486                 flags |= NLM_F_CREATE | NLM_F_REPLACE;
1487
1488                 if (!dev) { // Add null-route
1489                         rtm.rtm_scope = RT_SCOPE_UNIVERSE;
1490                         rtm.rtm_type = RTN_UNREACHABLE;
1491                 }
1492                 else
1493                         rtm.rtm_scope = (have_gw) ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK;
1494         }
1495
1496         if (route->flags & DEVROUTE_TYPE) {
1497                 rtm.rtm_type = route->type;
1498                 if (!(route->flags & (DEVROUTE_TABLE | DEVROUTE_SRCTABLE))) {
1499                         if (rtm.rtm_type == RTN_LOCAL || rtm.rtm_type == RTN_BROADCAST ||
1500                             rtm.rtm_type == RTN_NAT || rtm.rtm_type == RTN_ANYCAST)
1501                                 rtm.rtm_table = RT_TABLE_LOCAL;
1502                 }
1503
1504                 if (rtm.rtm_type == RTN_LOCAL || rtm.rtm_type == RTN_NAT) {
1505                         rtm.rtm_scope = RT_SCOPE_HOST;
1506                 } else if (rtm.rtm_type == RTN_BROADCAST || rtm.rtm_type == RTN_MULTICAST ||
1507                                 rtm.rtm_type == RTN_ANYCAST) {
1508                         rtm.rtm_scope = RT_SCOPE_LINK;
1509                 } else if (rtm.rtm_type == RTN_BLACKHOLE || rtm.rtm_type == RTN_UNREACHABLE ||
1510                                 rtm.rtm_type == RTN_PROHIBIT || rtm.rtm_type == RTN_FAILED_POLICY) {
1511                         rtm.rtm_scope = RT_SCOPE_UNIVERSE;
1512                         dev = NULL;
1513                 }
1514         }
1515
1516         msg = nlmsg_alloc_simple(cmd, flags);
1517         if (!msg)
1518                 return -1;
1519
1520         nlmsg_append(msg, &rtm, sizeof(rtm), 0);
1521
1522         if (route->mask)
1523                 nla_put(msg, RTA_DST, alen, &route->addr);
1524
1525         if (route->sourcemask) {
1526                 if (rtm.rtm_family == AF_INET)
1527                         nla_put(msg, RTA_PREFSRC, alen, &route->source);
1528                 else
1529                         nla_put(msg, RTA_SRC, alen, &route->source);
1530         }
1531
1532         if (route->metric > 0)
1533                 nla_put_u32(msg, RTA_PRIORITY, route->metric);
1534
1535         if (have_gw)
1536                 nla_put(msg, RTA_GATEWAY, alen, &route->nexthop);
1537
1538         if (dev)
1539                 nla_put_u32(msg, RTA_OIF, dev->ifindex);
1540
1541         if (table >= 256)
1542                 nla_put_u32(msg, RTA_TABLE, table);
1543
1544         if (route->flags & DEVROUTE_MTU) {
1545                 struct nlattr *metrics;
1546
1547                 if (!(metrics = nla_nest_start(msg, RTA_METRICS)))
1548                         goto nla_put_failure;
1549
1550                 nla_put_u32(msg, RTAX_MTU, route->mtu);
1551
1552                 nla_nest_end(msg, metrics);
1553         }
1554
1555         return system_rtnl_call(msg);
1556
1557 nla_put_failure:
1558         nlmsg_free(msg);
1559         return -ENOMEM;
1560 }
1561
1562 int system_add_route(struct device *dev, struct device_route *route)
1563 {
1564         return system_rt(dev, route, RTM_NEWROUTE);
1565 }
1566
1567 int system_del_route(struct device *dev, struct device_route *route)
1568 {
1569         return system_rt(dev, route, RTM_DELROUTE);
1570 }
1571
1572 int system_flush_routes(void)
1573 {
1574         const char *names[] = {
1575                 "/proc/sys/net/ipv4/route/flush",
1576                 "/proc/sys/net/ipv6/route/flush"
1577         };
1578         int fd, i;
1579
1580         for (i = 0; i < ARRAY_SIZE(names); i++) {
1581                 fd = open(names[i], O_WRONLY);
1582                 if (fd < 0)
1583                         continue;
1584
1585                 if (write(fd, "-1", 2)) {}
1586                 close(fd);
1587         }
1588         return 0;
1589 }
1590
1591 bool system_resolve_rt_type(const char *type, unsigned int *id)
1592 {
1593         return system_rtn_aton(type, id);
1594 }
1595
1596 bool system_resolve_rt_table(const char *name, unsigned int *id)
1597 {
1598         FILE *f;
1599         char *e, buf[128];
1600         unsigned int n, table = RT_TABLE_UNSPEC;
1601
1602         /* first try to parse table as number */
1603         if ((n = strtoul(name, &e, 0)) > 0 && !*e)
1604                 table = n;
1605
1606         /* handle well known aliases */
1607         else if (!strcmp(name, "default"))
1608                 table = RT_TABLE_DEFAULT;
1609         else if (!strcmp(name, "main"))
1610                 table = RT_TABLE_MAIN;
1611         else if (!strcmp(name, "local"))
1612                 table = RT_TABLE_LOCAL;
1613         else if (!strcmp(name, "prelocal"))
1614                 table = RT_TABLE_PRELOCAL;
1615
1616         /* try to look up name in /etc/iproute2/rt_tables */
1617         else if ((f = fopen("/etc/iproute2/rt_tables", "r")) != NULL)
1618         {
1619                 while (fgets(buf, sizeof(buf) - 1, f) != NULL)
1620                 {
1621                         if ((e = strtok(buf, " \t\n")) == NULL || *e == '#')
1622                                 continue;
1623
1624                         n = strtoul(e, NULL, 10);
1625                         e = strtok(NULL, " \t\n");
1626
1627                         if (e && !strcmp(e, name))
1628                         {
1629                                 table = n;
1630                                 break;
1631                         }
1632                 }
1633
1634                 fclose(f);
1635         }
1636
1637         if (table == RT_TABLE_UNSPEC)
1638                 return false;
1639
1640         *id = table;
1641         return true;
1642 }
1643
1644 bool system_is_default_rt_table(unsigned int id)
1645 {
1646         return (id == RT_TABLE_MAIN);
1647 }
1648
1649 bool system_resolve_rpfilter(const char *filter, unsigned int *id)
1650 {
1651         char *e;
1652         unsigned int n;
1653
1654         if (!strcmp(filter, "strict"))
1655                 n = 1;
1656         else if (!strcmp(filter, "loose"))
1657                 n = 2;
1658         else {
1659                 n = strtoul(filter, &e, 0);
1660                 if (*e || e == filter || n > 2)
1661                         return false;
1662         }
1663
1664         *id = n;
1665         return true;
1666 }
1667
1668 static int system_iprule(struct iprule *rule, int cmd)
1669 {
1670         int alen = ((rule->flags & IPRULE_FAMILY) == IPRULE_INET4) ? 4 : 16;
1671
1672         struct nl_msg *msg;
1673         struct rtmsg rtm = {
1674                 .rtm_family = (alen == 4) ? AF_INET : AF_INET6,
1675                 .rtm_protocol = RTPROT_STATIC,
1676                 .rtm_scope = RT_SCOPE_UNIVERSE,
1677                 .rtm_table = RT_TABLE_UNSPEC,
1678                 .rtm_type = RTN_UNSPEC,
1679                 .rtm_flags = 0,
1680         };
1681
1682         if (cmd == RTM_NEWRULE) {
1683                 rtm.rtm_type = RTN_UNICAST;
1684                 rtm.rtm_flags |= NLM_F_REPLACE | NLM_F_EXCL;
1685         }
1686
1687         if (rule->invert)
1688                 rtm.rtm_flags |= FIB_RULE_INVERT;
1689
1690         if (rule->flags & IPRULE_SRC)
1691                 rtm.rtm_src_len = rule->src_mask;
1692
1693         if (rule->flags & IPRULE_DEST)
1694                 rtm.rtm_dst_len = rule->dest_mask;
1695
1696         if (rule->flags & IPRULE_TOS)
1697                 rtm.rtm_tos = rule->tos;
1698
1699         if (rule->flags & IPRULE_LOOKUP) {
1700                 if (rule->lookup < 256)
1701                         rtm.rtm_table = rule->lookup;
1702         }
1703
1704         if (rule->flags & IPRULE_ACTION)
1705                 rtm.rtm_type = rule->action;
1706         else if (rule->flags & IPRULE_GOTO)
1707                 rtm.rtm_type = FR_ACT_GOTO;
1708         else if (!(rule->flags & (IPRULE_LOOKUP | IPRULE_ACTION | IPRULE_GOTO)))
1709                 rtm.rtm_type = FR_ACT_NOP;
1710
1711         msg = nlmsg_alloc_simple(cmd, NLM_F_REQUEST);
1712
1713         if (!msg)
1714                 return -1;
1715
1716         nlmsg_append(msg, &rtm, sizeof(rtm), 0);
1717
1718         if (rule->flags & IPRULE_IN)
1719                 nla_put(msg, FRA_IFNAME, strlen(rule->in_dev) + 1, rule->in_dev);
1720
1721         if (rule->flags & IPRULE_OUT)
1722                 nla_put(msg, FRA_OIFNAME, strlen(rule->out_dev) + 1, rule->out_dev);
1723
1724         if (rule->flags & IPRULE_SRC)
1725                 nla_put(msg, FRA_SRC, alen, &rule->src_addr);
1726
1727         if (rule->flags & IPRULE_DEST)
1728                 nla_put(msg, FRA_DST, alen, &rule->dest_addr);
1729
1730         if (rule->flags & IPRULE_PRIORITY)
1731                 nla_put_u32(msg, FRA_PRIORITY, rule->priority);
1732         else if (cmd == RTM_NEWRULE)
1733                 nla_put_u32(msg, FRA_PRIORITY, rule->order);
1734
1735         if (rule->flags & IPRULE_FWMARK)
1736                 nla_put_u32(msg, FRA_FWMARK, rule->fwmark);
1737
1738         if (rule->flags & IPRULE_FWMASK)
1739                 nla_put_u32(msg, FRA_FWMASK, rule->fwmask);
1740
1741         if (rule->flags & IPRULE_LOOKUP) {
1742                 if (rule->lookup >= 256)
1743                         nla_put_u32(msg, FRA_TABLE, rule->lookup);
1744         }
1745
1746         if (rule->flags & IPRULE_GOTO)
1747                 nla_put_u32(msg, FRA_GOTO, rule->gotoid);
1748
1749         return system_rtnl_call(msg);
1750 }
1751
1752 int system_add_iprule(struct iprule *rule)
1753 {
1754         return system_iprule(rule, RTM_NEWRULE);
1755 }
1756
1757 int system_del_iprule(struct iprule *rule)
1758 {
1759         return system_iprule(rule, RTM_DELRULE);
1760 }
1761
1762 int system_flush_iprules(void)
1763 {
1764         int rv = 0;
1765         struct iprule rule;
1766
1767         system_if_clear_entries(NULL, RTM_GETRULE, AF_INET);
1768         system_if_clear_entries(NULL, RTM_GETRULE, AF_INET6);
1769
1770         memset(&rule, 0, sizeof(rule));
1771
1772
1773         rule.flags = IPRULE_INET4 | IPRULE_PRIORITY | IPRULE_LOOKUP;
1774
1775         rule.priority = 0;
1776         rule.lookup = RT_TABLE_PRELOCAL;
1777         rv |= system_iprule(&rule, RTM_NEWRULE);
1778
1779         rule.priority = 1;
1780         rule.lookup = RT_TABLE_LOCAL;
1781         rv |= system_iprule(&rule, RTM_NEWRULE);
1782
1783         rule.priority = 32766;
1784         rule.lookup = RT_TABLE_MAIN;
1785         rv |= system_iprule(&rule, RTM_NEWRULE);
1786
1787         rule.priority = 32767;
1788         rule.lookup = RT_TABLE_DEFAULT;
1789         rv |= system_iprule(&rule, RTM_NEWRULE);
1790
1791
1792         rule.flags = IPRULE_INET6 | IPRULE_PRIORITY | IPRULE_LOOKUP;
1793
1794         rule.priority = 0;
1795         rule.lookup = RT_TABLE_PRELOCAL;
1796         rv |= system_iprule(&rule, RTM_NEWRULE);
1797
1798         rule.priority = 1;
1799         rule.lookup = RT_TABLE_LOCAL;
1800         rv |= system_iprule(&rule, RTM_NEWRULE);
1801
1802         rule.priority = 32766;
1803         rule.lookup = RT_TABLE_MAIN;
1804         rv |= system_iprule(&rule, RTM_NEWRULE);
1805
1806         return rv;
1807 }
1808
1809 bool system_resolve_iprule_action(const char *action, unsigned int *id)
1810 {
1811         return system_rtn_aton(action, id);
1812 }
1813
1814 time_t system_get_rtime(void)
1815 {
1816         struct timespec ts;
1817         struct timeval tv;
1818
1819         if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts) == 0)
1820                 return ts.tv_sec;
1821
1822         if (gettimeofday(&tv, NULL) == 0)
1823                 return tv.tv_sec;
1824
1825         return 0;
1826 }
1827
1828 #ifndef IP_DF
1829 #define IP_DF       0x4000
1830 #endif
1831
1832 static int tunnel_ioctl(const char *name, int cmd, void *p)
1833 {
1834         struct ifreq ifr;
1835
1836         memset(&ifr, 0, sizeof(ifr));
1837         strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1838         ifr.ifr_ifru.ifru_data = p;
1839         return ioctl(sock_ioctl, cmd, &ifr);
1840 }
1841
1842 #ifdef IFLA_IPTUN_MAX
1843 #define IP6_FLOWINFO_TCLASS     htonl(0x0FF00000)
1844 static int system_add_gre_tunnel(const char *name, const char *kind,
1845                                  const unsigned int link, struct blob_attr **tb, bool v6)
1846 {
1847         struct nl_msg *nlm;
1848         struct ifinfomsg ifi = { .ifi_family = AF_UNSPEC, };
1849         struct blob_attr *cur;
1850         uint32_t ikey = 0, okey = 0, flags = 0, flowinfo = 0;
1851         uint16_t iflags = 0, oflags = 0;
1852         uint8_t tos = 0;
1853         int ret = 0, ttl = 64;
1854
1855         nlm = nlmsg_alloc_simple(RTM_NEWLINK, NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_CREATE);
1856         if (!nlm)
1857                 return -1;
1858
1859         nlmsg_append(nlm, &ifi, sizeof(ifi), 0);
1860         nla_put_string(nlm, IFLA_IFNAME, name);
1861
1862         struct nlattr *linkinfo = nla_nest_start(nlm, IFLA_LINKINFO);
1863         if (!linkinfo) {
1864                 ret = -ENOMEM;
1865                 goto failure;
1866         }
1867
1868         nla_put_string(nlm, IFLA_INFO_KIND, kind);
1869         struct nlattr *infodata = nla_nest_start(nlm, IFLA_INFO_DATA);
1870         if (!infodata) {
1871                 ret = -ENOMEM;
1872                 goto failure;
1873         }
1874
1875         if (link)
1876                 nla_put_u32(nlm, IFLA_GRE_LINK, link);
1877
1878         if ((cur = tb[TUNNEL_ATTR_TTL]))
1879                 ttl = blobmsg_get_u32(cur);
1880
1881         nla_put_u8(nlm, IFLA_GRE_TTL, ttl);
1882
1883         if ((cur = tb[TUNNEL_ATTR_TOS])) {
1884                 char *str = blobmsg_get_string(cur);
1885                 if (strcmp(str, "inherit")) {
1886                         unsigned uval;
1887
1888                         if (!system_tos_aton(str, &uval)) {
1889                                 ret = -EINVAL;
1890                                 goto failure;
1891                         }
1892
1893                         if (v6)
1894                                 flowinfo |= htonl(uval << 20) & IP6_FLOWINFO_TCLASS;
1895                         else
1896                                 tos = uval;
1897                 } else {
1898                         if (v6)
1899                                 flags |= IP6_TNL_F_USE_ORIG_TCLASS;
1900                         else
1901                                 tos = 1;
1902                 }
1903         }
1904
1905         if ((cur = tb[TUNNEL_ATTR_INFO]) && (blobmsg_type(cur) == BLOBMSG_TYPE_STRING)) {
1906                 uint8_t icsum, ocsum, iseqno, oseqno;
1907                 if (sscanf(blobmsg_get_string(cur), "%u,%u,%hhu,%hhu,%hhu,%hhu",
1908                         &ikey, &okey, &icsum, &ocsum, &iseqno, &oseqno) < 6) {
1909                         ret = -EINVAL;
1910                         goto failure;
1911                 }
1912
1913                 if (ikey)
1914                         iflags |= GRE_KEY;
1915
1916                 if (okey)
1917                         oflags |= GRE_KEY;
1918
1919                 if (icsum)
1920                         iflags |= GRE_CSUM;
1921
1922                 if (ocsum)
1923                         oflags |= GRE_CSUM;
1924
1925                 if (iseqno)
1926                         iflags |= GRE_SEQ;
1927
1928                 if (oseqno)
1929                         oflags |= GRE_SEQ;
1930         }
1931
1932         if (v6) {
1933                 struct in6_addr in6buf;
1934                 if ((cur = tb[TUNNEL_ATTR_LOCAL])) {
1935                         if (inet_pton(AF_INET6, blobmsg_data(cur), &in6buf) < 1) {
1936                                 ret = -EINVAL;
1937                                 goto failure;
1938                         }
1939                         nla_put(nlm, IFLA_GRE_LOCAL, sizeof(in6buf), &in6buf);
1940                 }
1941
1942                 if ((cur = tb[TUNNEL_ATTR_REMOTE])) {
1943                         if (inet_pton(AF_INET6, blobmsg_data(cur), &in6buf) < 1) {
1944                                 ret = -EINVAL;
1945                                 goto failure;
1946                         }
1947                         nla_put(nlm, IFLA_GRE_REMOTE, sizeof(in6buf), &in6buf);
1948                 }
1949                 nla_put_u8(nlm, IFLA_GRE_ENCAP_LIMIT, 4);
1950
1951                 if (flowinfo)
1952                         nla_put_u32(nlm, IFLA_GRE_FLOWINFO, flowinfo);
1953
1954                 if (flags)
1955                         nla_put_u32(nlm, IFLA_GRE_FLAGS, flags);
1956         } else {
1957                 struct in_addr inbuf;
1958                 bool set_df = true;
1959
1960                 if ((cur = tb[TUNNEL_ATTR_LOCAL])) {
1961                         if (inet_pton(AF_INET, blobmsg_data(cur), &inbuf) < 1) {
1962                                 ret = -EINVAL;
1963                                 goto failure;
1964                         }
1965                         nla_put(nlm, IFLA_GRE_LOCAL, sizeof(inbuf), &inbuf);
1966                 }
1967
1968                 if ((cur = tb[TUNNEL_ATTR_REMOTE])) {
1969                         if (inet_pton(AF_INET, blobmsg_data(cur), &inbuf) < 1) {
1970                                 ret = -EINVAL;
1971                                 goto failure;
1972                         }
1973                         nla_put(nlm, IFLA_GRE_REMOTE, sizeof(inbuf), &inbuf);
1974
1975                         if (IN_MULTICAST(ntohl(inbuf.s_addr))) {
1976                                 if (!okey) {
1977                                         okey = inbuf.s_addr;
1978                                         oflags |= GRE_KEY;
1979                                 }
1980
1981                                 if (!ikey) {
1982                                         ikey = inbuf.s_addr;
1983                                         iflags |= GRE_KEY;
1984                                 }
1985                         }
1986                 }
1987
1988                 if ((cur = tb[TUNNEL_ATTR_DF]))
1989                         set_df = blobmsg_get_bool(cur);
1990
1991                 /* ttl !=0 and nopmtudisc are incompatible */
1992                 if (ttl && !set_df) {
1993                         ret = -EINVAL;
1994                         goto failure;
1995                 }
1996
1997                 nla_put_u8(nlm, IFLA_GRE_PMTUDISC, set_df ? 1 : 0);
1998
1999                 nla_put_u8(nlm, IFLA_GRE_TOS, tos);
2000         }
2001
2002         if (oflags)
2003                 nla_put_u16(nlm, IFLA_GRE_OFLAGS, oflags);
2004
2005         if (iflags)
2006                 nla_put_u16(nlm, IFLA_GRE_IFLAGS, iflags);
2007
2008         if (okey)
2009                 nla_put_u32(nlm, IFLA_GRE_OKEY, okey);
2010
2011         if (ikey)
2012                 nla_put_u32(nlm, IFLA_GRE_IKEY, ikey);
2013
2014         nla_nest_end(nlm, infodata);
2015         nla_nest_end(nlm, linkinfo);
2016
2017         return system_rtnl_call(nlm);
2018
2019 failure:
2020         nlmsg_free(nlm);
2021         return ret;
2022 }
2023 #endif
2024
2025 static int system_add_proto_tunnel(const char *name, const uint8_t proto, const unsigned int link, struct blob_attr **tb)
2026 {
2027         struct blob_attr *cur;
2028         bool set_df = true;
2029         struct ip_tunnel_parm p  = {
2030                 .link = link,
2031                 .iph = {
2032                         .version = 4,
2033                         .ihl = 5,
2034                         .protocol = proto,
2035                 }
2036         };
2037
2038         if ((cur = tb[TUNNEL_ATTR_LOCAL]) &&
2039                         inet_pton(AF_INET, blobmsg_data(cur), &p.iph.saddr) < 1)
2040                 return -EINVAL;
2041
2042         if ((cur = tb[TUNNEL_ATTR_REMOTE]) &&
2043                         inet_pton(AF_INET, blobmsg_data(cur), &p.iph.daddr) < 1)
2044                 return -EINVAL;
2045
2046         if ((cur = tb[TUNNEL_ATTR_DF]))
2047                 set_df = blobmsg_get_bool(cur);
2048
2049         if ((cur = tb[TUNNEL_ATTR_TTL]))
2050                 p.iph.ttl = blobmsg_get_u32(cur);
2051
2052         if ((cur = tb[TUNNEL_ATTR_TOS])) {
2053                 char *str = blobmsg_get_string(cur);
2054                 if (strcmp(str, "inherit")) {
2055                         unsigned uval;
2056
2057                         if (!system_tos_aton(str, &uval))
2058                                 return -EINVAL;
2059
2060                         p.iph.tos = uval;
2061                 } else
2062                         p.iph.tos = 1;
2063         }
2064
2065         p.iph.frag_off = set_df ? htons(IP_DF) : 0;
2066         /* ttl !=0 and nopmtudisc are incompatible */
2067         if (p.iph.ttl && p.iph.frag_off == 0)
2068                 return -EINVAL;
2069
2070         strncpy(p.name, name, sizeof(p.name));
2071
2072         switch (p.iph.protocol) {
2073         case IPPROTO_IPIP:
2074                 return tunnel_ioctl("tunl0", SIOCADDTUNNEL, &p);
2075         case IPPROTO_IPV6:
2076                 return tunnel_ioctl("sit0", SIOCADDTUNNEL, &p);
2077         default:
2078                 break;
2079         }
2080         return -1;
2081 }
2082
2083 static int __system_del_ip_tunnel(const char *name, struct blob_attr **tb)
2084 {
2085         struct blob_attr *cur;
2086         const char *str;
2087
2088         if (!(cur = tb[TUNNEL_ATTR_TYPE]))
2089                 return -EINVAL;
2090         str = blobmsg_data(cur);
2091
2092         if (!strcmp(str, "greip") || !strcmp(str, "gretapip") ||
2093             !strcmp(str, "greip6") || !strcmp(str, "gretapip6"))
2094                 return system_link_del(name);
2095         else
2096                 return tunnel_ioctl(name, SIOCDELTUNNEL, NULL);
2097 }
2098
2099 int system_del_ip_tunnel(const char *name, struct blob_attr *attr)
2100 {
2101         struct blob_attr *tb[__TUNNEL_ATTR_MAX];
2102
2103         blobmsg_parse(tunnel_attr_list.params, __TUNNEL_ATTR_MAX, tb,
2104                 blob_data(attr), blob_len(attr));
2105
2106         return __system_del_ip_tunnel(name, tb);
2107 }
2108
2109 int system_update_ipv6_mtu(struct device *dev, int mtu)
2110 {
2111         int ret = -1;
2112         char buf[64];
2113         snprintf(buf, sizeof(buf), "/proc/sys/net/ipv6/conf/%s/mtu",
2114                         dev->ifname);
2115
2116         int fd = open(buf, O_RDWR);
2117         ssize_t len = read(fd, buf, sizeof(buf) - 1);
2118         if (len < 0)
2119                 goto out;
2120
2121         buf[len] = 0;
2122         ret = atoi(buf);
2123
2124         if (!mtu || ret <= mtu)
2125                 goto out;
2126
2127         lseek(fd, 0, SEEK_SET);
2128         if (write(fd, buf, snprintf(buf, sizeof(buf), "%i", mtu)) <= 0)
2129                 ret = -1;
2130
2131 out:
2132         close(fd);
2133         return ret;
2134 }
2135
2136 int system_add_ip_tunnel(const char *name, struct blob_attr *attr)
2137 {
2138         struct blob_attr *tb[__TUNNEL_ATTR_MAX];
2139         struct blob_attr *cur;
2140         const char *str;
2141
2142         blobmsg_parse(tunnel_attr_list.params, __TUNNEL_ATTR_MAX, tb,
2143                 blob_data(attr), blob_len(attr));
2144
2145         __system_del_ip_tunnel(name, tb);
2146
2147         if (!(cur = tb[TUNNEL_ATTR_TYPE]))
2148                 return -EINVAL;
2149         str = blobmsg_data(cur);
2150
2151         unsigned int ttl = 0;
2152         if ((cur = tb[TUNNEL_ATTR_TTL])) {
2153                 ttl = blobmsg_get_u32(cur);
2154                 if (ttl > 255)
2155                         return -EINVAL;
2156         }
2157
2158         unsigned int link = 0;
2159         if ((cur = tb[TUNNEL_ATTR_LINK])) {
2160                 struct interface *iface = vlist_find(&interfaces, blobmsg_data(cur), iface, node);
2161                 if (!iface)
2162                         return -EINVAL;
2163
2164                 if (iface->l3_dev.dev)
2165                         link = iface->l3_dev.dev->ifindex;
2166         }
2167
2168         if (!strcmp(str, "sit")) {
2169                 if (system_add_proto_tunnel(name, IPPROTO_IPV6, link, tb) < 0)
2170                         return -1;
2171
2172 #ifdef SIOCADD6RD
2173                 if ((cur = tb[TUNNEL_ATTR_6RD_PREFIX])) {
2174                         unsigned int mask;
2175                         struct ip_tunnel_6rd p6;
2176
2177                         memset(&p6, 0, sizeof(p6));
2178
2179                         if (!parse_ip_and_netmask(AF_INET6, blobmsg_data(cur),
2180                                                 &p6.prefix, &mask) || mask > 128)
2181                                 return -EINVAL;
2182                         p6.prefixlen = mask;
2183
2184                         if ((cur = tb[TUNNEL_ATTR_6RD_RELAY_PREFIX])) {
2185                                 if (!parse_ip_and_netmask(AF_INET, blobmsg_data(cur),
2186                                                         &p6.relay_prefix, &mask) || mask > 32)
2187                                         return -EINVAL;
2188                                 p6.relay_prefixlen = mask;
2189                         }
2190
2191                         if (tunnel_ioctl(name, SIOCADD6RD, &p6) < 0) {
2192                                 __system_del_ip_tunnel(name, tb);
2193                                 return -1;
2194                         }
2195                 }
2196 #endif
2197 #ifdef IFLA_IPTUN_MAX
2198         } else if (!strcmp(str, "ipip6")) {
2199                 struct nl_msg *nlm = nlmsg_alloc_simple(RTM_NEWLINK,
2200                                 NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_CREATE);
2201                 struct ifinfomsg ifi = { .ifi_family = AF_UNSPEC };
2202                 int ret = 0;
2203
2204                 if (!nlm)
2205                         return -1;
2206
2207                 nlmsg_append(nlm, &ifi, sizeof(ifi), 0);
2208                 nla_put_string(nlm, IFLA_IFNAME, name);
2209
2210                 if (link)
2211                         nla_put_u32(nlm, IFLA_LINK, link);
2212
2213                 struct nlattr *linkinfo = nla_nest_start(nlm, IFLA_LINKINFO);
2214                 if (!linkinfo) {
2215                         ret = -ENOMEM;
2216                         goto failure;
2217                 }
2218                 nla_put_string(nlm, IFLA_INFO_KIND, "ip6tnl");
2219                 struct nlattr *infodata = nla_nest_start(nlm, IFLA_INFO_DATA);
2220                 if (!infodata) {
2221                         ret = -ENOMEM;
2222                         goto failure;
2223                 }
2224
2225                 if (link)
2226                         nla_put_u32(nlm, IFLA_IPTUN_LINK, link);
2227
2228                 nla_put_u8(nlm, IFLA_IPTUN_PROTO, IPPROTO_IPIP);
2229                 nla_put_u8(nlm, IFLA_IPTUN_TTL, (ttl) ? ttl : 64);
2230                 nla_put_u8(nlm, IFLA_IPTUN_ENCAP_LIMIT, 4);
2231
2232                 struct in6_addr in6buf;
2233                 if ((cur = tb[TUNNEL_ATTR_LOCAL])) {
2234                         if (inet_pton(AF_INET6, blobmsg_data(cur), &in6buf) < 1) {
2235                                 ret = -EINVAL;
2236                                 goto failure;
2237                         }
2238                         nla_put(nlm, IFLA_IPTUN_LOCAL, sizeof(in6buf), &in6buf);
2239                 }
2240
2241                 if ((cur = tb[TUNNEL_ATTR_REMOTE])) {
2242                         if (inet_pton(AF_INET6, blobmsg_data(cur), &in6buf) < 1) {
2243                                 ret = -EINVAL;
2244                                 goto failure;
2245                         }
2246                         nla_put(nlm, IFLA_IPTUN_REMOTE, sizeof(in6buf), &in6buf);
2247                 }
2248
2249 #ifdef IFLA_IPTUN_FMR_MAX
2250                 if ((cur = tb[TUNNEL_ATTR_FMRS])) {
2251                         struct nlattr *fmrs = nla_nest_start(nlm, IFLA_IPTUN_FMRS);
2252
2253                         struct blob_attr *fmr;
2254                         unsigned rem, fmrcnt = 0;
2255                         blobmsg_for_each_attr(fmr, cur, rem) {
2256                                 if (blobmsg_type(fmr) != BLOBMSG_TYPE_STRING)
2257                                         continue;
2258
2259                                 unsigned ip4len, ip6len, ealen, offset = 6;
2260                                 char ip6buf[48];
2261                                 char ip4buf[16];
2262
2263                                 if (sscanf(blobmsg_get_string(fmr), "%47[^/]/%u,%15[^/]/%u,%u,%u",
2264                                                 ip6buf, &ip6len, ip4buf, &ip4len, &ealen, &offset) < 5) {
2265                                         ret = -EINVAL;
2266                                         goto failure;
2267                                 }
2268
2269                                 struct in6_addr ip6prefix;
2270                                 struct in_addr ip4prefix;
2271                                 if (inet_pton(AF_INET6, ip6buf, &ip6prefix) != 1 ||
2272                                                 inet_pton(AF_INET, ip4buf, &ip4prefix) != 1) {
2273                                         ret = -EINVAL;
2274                                         goto failure;
2275                                 }
2276
2277                                 struct nlattr *rule = nla_nest_start(nlm, ++fmrcnt);
2278
2279                                 nla_put(nlm, IFLA_IPTUN_FMR_IP6_PREFIX, sizeof(ip6prefix), &ip6prefix);
2280                                 nla_put(nlm, IFLA_IPTUN_FMR_IP4_PREFIX, sizeof(ip4prefix), &ip4prefix);
2281                                 nla_put_u8(nlm, IFLA_IPTUN_FMR_IP6_PREFIX_LEN, ip6len);
2282                                 nla_put_u8(nlm, IFLA_IPTUN_FMR_IP4_PREFIX_LEN, ip4len);
2283                                 nla_put_u8(nlm, IFLA_IPTUN_FMR_EA_LEN, ealen);
2284                                 nla_put_u8(nlm, IFLA_IPTUN_FMR_OFFSET, offset);
2285
2286                                 nla_nest_end(nlm, rule);
2287                         }
2288
2289                         nla_nest_end(nlm, fmrs);
2290                 }
2291 #endif
2292
2293                 nla_nest_end(nlm, infodata);
2294                 nla_nest_end(nlm, linkinfo);
2295
2296                 return system_rtnl_call(nlm);
2297 failure:
2298                 nlmsg_free(nlm);
2299                 return ret;
2300         } else if (!strcmp(str, "greip")) {
2301                 return system_add_gre_tunnel(name, "gre", link, tb, false);
2302         } else if (!strcmp(str, "gretapip"))  {
2303                 return system_add_gre_tunnel(name, "gretap", link, tb, false);
2304         } else if (!strcmp(str, "greip6")) {
2305                 return system_add_gre_tunnel(name, "ip6gre", link, tb, true);
2306         } else if (!strcmp(str, "gretapip6")) {
2307                 return system_add_gre_tunnel(name, "ip6gretap", link, tb, true);
2308 #endif
2309         } else if (!strcmp(str, "ipip")) {
2310                 return system_add_proto_tunnel(name, IPPROTO_IPIP, link, tb);
2311         }
2312         else
2313                 return -EINVAL;
2314
2315         return 0;
2316 }