ce66adadb4eedd5e322ea56d630c5041b21a1ec8
[project/luci.git] / contrib / package / freifunk-watchdog / src / watchdog.c
1 /*
2  *   This program is free software; you can redistribute it and/or modify
3  *   it under the terms of the GNU General Public License as published by
4  *   the Free Software Foundation; either version 2 of the License, or
5  *   (at your option) any later version.
6  *
7  *   This program is distributed in the hope that it will be useful,
8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  *   GNU General Public License for more details.
11  *
12  *   You should have received a copy of the GNU General Public License
13  *   along with this program; if not, write to the Free Software
14  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
15  *
16  *   Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
17  */
18
19 #include "watchdog.h"
20
21 /* Global watchdog fd, required by signal handler */
22 int wdfd = -1;
23
24 /* Handle finished childs */
25 static void sigchld_handler(int sig)
26 {
27         pid_t pid;
28
29         while( (pid = waitpid(-1, NULL, WNOHANG)) > 0 )
30                 syslog(LOG_INFO, "Child returned (pid %d)", pid);
31 }
32
33 /* Watchdog shutdown helper */
34 static void shutdown_watchdog(int sig)
35 {
36         static const char wshutdown = WATCH_SHUTDOWN;
37
38         if( wdfd > -1 )
39         {
40                 syslog(LOG_INFO, "Stopping watchdog timer");
41                 write(wdfd, &wshutdown, 1);
42                 close(wdfd);
43                 wdfd = -1;
44         }
45
46         exit(0);
47 }
48
49 /* Get BSSID of given interface */
50 static int iw_get_bssid(int iwfd, const char *ifname, char *bssid)
51 {
52         struct iwreq iwrq;
53
54         if( iw_ioctl(iwfd, ifname, SIOCGIWAP, &iwrq) >= 0 )
55         {
56                 unsigned char *addr = (unsigned char *)iwrq.u.ap_addr.sa_data;
57
58                 sprintf(bssid, "%02X:%02X:%02X:%02X:%02X:%02X",
59                         addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
60
61                 return 0;
62         }
63
64         return -1;
65 }
66
67 /* Get channel of given interface */
68 static int iw_get_channel(int iwfd, const char *ifname, int *channel)
69 {
70         int i;
71         char buffer[sizeof(struct iw_range)];
72         double cur_freq, cmp_freq;
73         struct iwreq iwrq;
74         struct iw_range *range;
75
76         memset(buffer, 0, sizeof(buffer));
77
78         iwrq.u.data.pointer = (char *)buffer;
79         iwrq.u.data.length = sizeof(buffer);
80         iwrq.u.data.flags = 0;
81
82         if( iw_ioctl(iwfd, ifname, SIOCGIWRANGE, &iwrq) < 0)
83         {
84                 *channel = -1;
85                 return -1;
86         }
87
88         range = (struct iw_range *)buffer;
89
90         if( iw_ioctl(iwfd, ifname, SIOCGIWFREQ, &iwrq) >= 0 )
91         {
92                 cur_freq = ((double)iwrq.u.freq.m) * pow(10, iwrq.u.freq.e);
93                 if( cur_freq < 1000.00 )
94                 {
95                         *channel = (int)cur_freq;
96                         return 0;
97                 }
98
99                 for(i = 0; i < range->num_frequency; i++)
100                 {
101                         cmp_freq = ((double)range->freq[i].m) * pow(10, range->freq[i].e);
102                         if( cmp_freq == cur_freq )
103                         {
104                                 *channel = (int)range->freq[i].i;
105                                 return 0;
106                         }
107                 }
108         }
109
110         *channel = -1;
111         return -1;
112 }
113
114 /* Get the (first) pid of given process name */
115 static int find_process(const char *name)
116 {
117         int pid = -1;
118         int file;
119         char buffer[128];
120         char cmpname[128];
121         DIR *dir;
122         struct dirent *entry;
123
124         if( (dir = opendir("/proc")) != NULL )
125         {
126                 snprintf(cmpname, sizeof(cmpname), "Name:\t%s\n", name);
127
128                 while( (entry = readdir(dir)) != NULL )
129                 {
130                         if( !strcmp(entry->d_name, "..") || !isdigit(*entry->d_name) )
131                                 continue;
132
133                         sprintf(buffer, "/proc/%s/status", entry->d_name);
134                         if( (file = open(buffer, O_RDONLY)) > -1 )
135                         {
136                                 read(file, buffer, sizeof(buffer));
137                                 close(file);
138
139                                 if( strstr(buffer, cmpname) == buffer )
140                                 {
141                                         pid = atoi(entry->d_name);
142
143                                         /* Skip myself ... */
144                                         if( pid == getpid() )
145                                                 pid = -1;
146                                         else
147                                                 break;
148                                 }
149                         }
150                 }
151
152                 closedir(dir);
153                 return pid;
154         }
155
156         syslog(LOG_CRIT, "Unable to open /proc: %s",
157                 strerror(errno));
158
159         return -1;
160 }
161
162 /* Get the 5 minute load average */
163 static double find_loadavg(void)
164 {
165         int fd;
166         char buffer[10];
167         double load = 0.00;
168
169         if( (fd = open("/proc/loadavg", O_RDONLY)) > -1 )
170         {
171                 if( read(fd, buffer, sizeof(buffer)) == sizeof(buffer) )
172                         load = atof(&buffer[5]);
173
174                 close(fd);
175         }
176
177         return load;
178 }
179
180 /* Check if given uci file was updated */
181 static int check_uci_update(const char *config, time_t *mtime)
182 {
183         struct stat s;
184         char path[128];
185
186         snprintf(path, sizeof(path), "/var/state/%s", config);
187         if( stat(path, &s) > -1 )
188         {
189                 if( (*mtime == 0) || (s.st_mtime > *mtime) )
190                 {
191                         *mtime = s.st_mtime;
192                         return 1;
193                 }
194         }
195
196         return 0;
197 }
198
199 /* Add tuple */
200 static void load_wifi_uci_add_iface(const char *section, struct uci_wifi_iface_itr_ctx *itr)
201 {
202         wifi_tuple_t *t;
203         const char *ucitmp;
204         int val = 0;
205
206         ucitmp = ucix_get_option(itr->ctx, "wireless", section, "mode");
207         if( ucitmp && !strncmp(ucitmp, "adhoc", 5) )
208         {
209                 if( (t = (wifi_tuple_t *)malloc(sizeof(wifi_tuple_t))) != NULL )
210                 {
211                         ucitmp = ucix_get_option(itr->ctx, "wireless", section, "ifname");
212                         if(ucitmp)
213                         {
214                                 strncpy(t->ifname, ucitmp, sizeof(t->ifname));
215                                 val++;
216                         }
217
218                         ucitmp = ucix_get_option(itr->ctx, "wireless", section, "bssid");
219                         if(ucitmp)
220                         {
221                                 strncpy(t->bssid, ucitmp, sizeof(t->bssid));
222                                 val++;
223                         }
224
225                         ucitmp = ucix_get_option(itr->ctx, "wireless", section, "device");
226                         if(ucitmp)
227                         {
228                                 ucitmp = ucix_get_option(itr->ctx, "wireless", ucitmp, "channel");
229                                 if(ucitmp)
230                                 {
231                                         t->channel = atoi(ucitmp);
232                                         val++;
233                                 }
234                         }
235
236                         if( val == 3 )
237                         {
238                                 syslog(LOG_INFO, "Monitoring %s: bssid=%s channel=%d",
239                                         t->ifname, t->bssid, t->channel);
240
241                                 t->next = itr->list;
242                                 itr->list = t;
243                         }
244                         else
245                         {
246                                 free(t);
247                         }
248                 }
249         }
250 }
251
252 /* Load config */
253 static wifi_tuple_t * load_wifi_uci(wifi_tuple_t *ifs, time_t *modtime)
254 {
255         struct uci_context *ctx;
256         struct uci_wifi_iface_itr_ctx itr;
257         wifi_tuple_t *cur, *next;
258
259         if( check_uci_update("wireless", modtime) )
260         {
261                 syslog(LOG_INFO, "Wireless config changed, reloading");
262
263                 if( (ctx = ucix_init("wireless")) != NULL )
264                 {
265                         if( ifs != NULL )
266                         {
267                                 for(cur = ifs; cur; cur = next)
268                                 {
269                                         next = cur->next;
270                                         free(cur);
271                                 }
272                         }
273
274                         itr.list = NULL;
275                         itr.ctx = ctx;
276
277                         ucix_for_each_section_type(ctx, "wireless", "wifi-iface",
278                                 (void *)load_wifi_uci_add_iface, &itr);
279
280                         return itr.list;
281                 }
282         }
283
284         return ifs;
285 }
286
287 /* Add tuple */
288 static void load_watchdog_uci_add_process(const char *section, struct uci_process_itr_ctx *itr)
289 {
290         process_tuple_t *t;
291         const char *ucitmp;
292         int val = 0;
293
294         if( (t = (process_tuple_t *)malloc(sizeof(process_tuple_t))) != NULL )
295         {
296                 t->restart = 0;
297
298                 ucitmp = ucix_get_option(itr->ctx, "freifunk-watchdog", section, "process");
299                 if(ucitmp)
300                 {
301                         strncpy(t->process, ucitmp, sizeof(t->process));
302                         val++;
303                 }
304
305                 ucitmp = ucix_get_option(itr->ctx, "freifunk-watchdog", section, "initscript");
306                 if(ucitmp)
307                 {
308                         strncpy(t->initscript, ucitmp, sizeof(t->initscript));
309                         val++;
310                 }
311
312                 if( val == 2 )
313                 {
314                         syslog(LOG_INFO, "Monitoring %s: initscript=%s",
315                                 t->process, t->initscript);
316
317                                 t->next = itr->list;
318                                 itr->list = t;
319                 }
320                 else
321                 {
322                         free(t);
323                 }
324         }
325 }
326
327 /* Load config */
328 static process_tuple_t * load_watchdog_uci(process_tuple_t *procs)
329 {
330         struct uci_context *ctx;
331         struct uci_process_itr_ctx itr;
332         process_tuple_t *cur, *next;
333
334         syslog(LOG_INFO, "Loading watchdog config");
335
336         if( (ctx = ucix_init("freifunk-watchdog")) != NULL )
337         {
338                 if( procs != NULL )
339                 {
340                         for(cur = procs; cur; cur = next)
341                         {
342                                 next = cur->next;
343                                 free(cur);
344                         }
345                 }
346
347                 itr.list = NULL;
348                 itr.ctx = ctx;
349
350                 ucix_for_each_section_type(ctx, "freifunk-watchdog", "process",
351                         (void *)load_watchdog_uci_add_process, &itr);
352
353                 return itr.list;
354         }
355
356         return procs;
357 }
358
359 /* Daemon implementation */
360 static int do_daemon(void)
361 {
362         static int wdtrigger = 1;
363         static int wdtimeout = BASE_INTERVAL * 2;
364         static const char wdkeepalive = WATCH_KEEPALIVE;
365
366         int iwfd;
367         int channel;
368         char bssid[18];
369         struct sigaction sa;
370
371         wifi_tuple_t *ifs = NULL, *curr_if;
372         process_tuple_t *procs = NULL, *curr_proc;
373         time_t wireless_modtime = 0;
374
375         int action_intv = 0;
376         int restart_wifi = 0;
377         int loadavg_panic = 0;
378
379         openlog(SYSLOG_IDENT, 0, LOG_DAEMON);
380         memset(&sa, 0, sizeof(sa));
381
382         if( (iwfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1 )
383         {
384                 syslog(LOG_ERR, "Can not open wireless control socket: %s",
385                         strerror(errno));
386
387                 return 1;
388         }
389
390         if( (wdfd = open(WATCH_DEVICE, O_WRONLY)) > -1 )
391         {
392                 syslog(LOG_INFO, "Opened %s - polling every %i seconds",
393                         WATCH_DEVICE, BASE_INTERVAL);
394
395                 /* Install signal handler to halt watchdog on shutdown */
396                 sa.sa_handler = shutdown_watchdog;
397                 sa.sa_flags = SA_NOCLDWAIT | SA_RESTART;
398                 sigaction(SIGHUP,  &sa, NULL);
399                 sigaction(SIGINT,  &sa, NULL);
400                 sigaction(SIGPIPE, &sa, NULL);
401                 sigaction(SIGTERM, &sa, NULL);
402                 sigaction(SIGUSR1, &sa, NULL);
403                 sigaction(SIGUSR2, &sa, NULL);
404
405                 /* Set watchdog timeout to twice the interval */
406                 ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtimeout);
407         }
408
409         /* Install signal handler to reap childs */
410         sa.sa_handler = sigchld_handler;
411         sa.sa_flags = 0;
412         sigaction(SIGCHLD, &sa, NULL);
413
414         /* Load watchdog configuration only once */
415         procs = load_watchdog_uci(procs);
416
417         while( 1 )
418         {
419                 /* Check/increment action interval */
420                 if( ++action_intv >= ACTION_INTERVAL )
421                 {
422                         /* Reset action interval */
423                         action_intv = 0;
424
425                         /* Check average load */
426                         if( find_loadavg() >= LOAD_TRESHOLD )
427                                 loadavg_panic++;
428                         else
429                                 loadavg_panic = 0;
430
431                         /* Check wireless interfaces */
432                         ifs = load_wifi_uci(ifs, &wireless_modtime);
433                         for( curr_if = ifs; curr_if; curr_if = curr_if->next )
434                         {
435                                 /* Get current channel and bssid */
436                                 if( (iw_get_bssid(iwfd, curr_if->ifname, bssid) == 0) &&
437                             (iw_get_channel(iwfd, curr_if->ifname, &channel) == 0) )
438                                 {
439                                         /* Check BSSID */
440                                         if( strcasecmp(bssid, curr_if->bssid) != 0 )
441                                         {
442                                                 syslog(LOG_WARNING, "BSSID mismatch on %s: current=%s wanted=%s",
443                                                         curr_if->ifname, bssid, curr_if->bssid);
444
445                                                 restart_wifi++;
446                                         }
447
448                                         /* Check channel */
449                                         else if( channel != curr_if->channel )
450                                         {
451                                                 syslog(LOG_WARNING, "Channel mismatch on %s: current=%d wanted=%d",
452                                                         curr_if->ifname, channel, curr_if->channel);
453
454                                                 restart_wifi++;
455                                         }
456                                 }
457                                 else
458                                 {
459                                         syslog(LOG_WARNING, "Requested interface %s not present", curr_if->ifname);
460                                 }
461                         }
462
463                         /* Check processes */
464                         for( curr_proc = procs; curr_proc; curr_proc = curr_proc->next )
465                         {
466                                 if( find_process(curr_proc->process) < 0 )
467                                         curr_proc->restart++;
468                                 else
469                                         curr_proc->restart = 0;
470
471                                 /* Process restart required? */
472                                 if( curr_proc->restart >= HYSTERESIS )
473                                 {
474                                         curr_proc->restart = 0;
475                                         syslog(LOG_WARNING, "The %s process died, restarting", curr_proc->process);
476                                         EXEC(PROC_ACTION);
477                                 }
478                         }
479
480
481                         /* Wifi restart required? */
482                         if( restart_wifi >= HYSTERESIS )
483                         {
484                                 restart_wifi = 0;
485                                 syslog(LOG_WARNING, "Channel or BSSID mismatch on wireless interface, restarting");
486                                 EXEC(WIFI_ACTION);
487                         }
488
489                         /* Is there a load problem? */
490                         if( loadavg_panic >= HYSTERESIS )
491                         {
492                                 syslog(LOG_EMERG, "Critical system load level, triggering reset!");
493
494                                 /* Try watchdog, fall back to reboot */
495                                 if( wdfd > -1 )
496                                         ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtrigger);
497                                 else
498                                         EXEC(LOAD_ACTION);
499                         }
500                 }
501
502
503                 /* Reset watchdog timer */
504                 if( wdfd > -1 )
505                         write(wdfd, &wdkeepalive, 1);
506
507                 sleep(BASE_INTERVAL);
508         }
509
510         shutdown_watchdog(0);
511         closelog();
512
513         return 0;
514 }
515
516
517 int main(int argc, char *argv[])
518 {
519         /* Check if watchdog is running ... */
520         if( (argc > 1) && (strcmp(argv[1], "running") == 0) )
521         {
522                 return (find_process(BINARY) == -1);
523         }
524
525         /* Start daemon */
526         return do_daemon();
527 }