contrib/package/freifunk-watchdog:
[project/luci.git] / contrib / package / freifunk-watchdog / src / watchdog.c
1 /*
2  *   This program is free software; you can redistribute it and/or modify
3  *   it under the terms of the GNU General Public License as published by
4  *   the Free Software Foundation; either version 2 of the License, or
5  *   (at your option) any later version.
6  *
7  *   This program is distributed in the hope that it will be useful,
8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  *   GNU General Public License for more details.
11  *
12  *   You should have received a copy of the GNU General Public License
13  *   along with this program; if not, write to the Free Software
14  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
15  *
16  *   Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
17  */
18
19 #include "watchdog.h"
20
21 /* Global watchdog fd, required by signal handler */
22 int wdfd = -1;
23
24 /* Watchdog shutdown helper */
25 static void shutdown_watchdog(int sig)
26 {
27         static int wdelay = 3600;
28         static const char wshutdown = WATCH_SHUTDOWN;
29
30         if( wdfd > -1 )
31         {
32                 syslog(LOG_INFO, "Stopping watchdog timer");
33                 write(wdfd, &wshutdown, 1);
34                 close(wdfd);
35                 wdfd = -1;
36         }
37
38         exit(0);
39 }
40
41 /* Get BSSID of given interface */
42 static int iw_get_bssid(int iwfd, const char *ifname, char *bssid)
43 {
44         struct iwreq iwrq;
45
46         if( iw_ioctl(iwfd, ifname, SIOCGIWAP, &iwrq) >= 0 )
47         {
48                 unsigned char *addr = (unsigned char *)iwrq.u.ap_addr.sa_data;
49
50                 sprintf(bssid, "%02X:%02X:%02X:%02X:%02X:%02X",
51                         addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
52
53                 return 0;
54         }
55
56         return -1;
57 }
58
59 /* Get channel of given interface */
60 static int iw_get_channel(int iwfd, const char *ifname, int *channel)
61 {
62         int i;
63         char buffer[sizeof(struct iw_range)];
64         double cur_freq, cmp_freq;
65         struct iwreq iwrq;
66         struct iw_range *range;
67
68         memset(buffer, 0, sizeof(buffer));
69
70         iwrq.u.data.pointer = (char *)buffer;
71         iwrq.u.data.length = sizeof(buffer);
72         iwrq.u.data.flags = 0;
73
74         if( iw_ioctl(iwfd, ifname, SIOCGIWRANGE, &iwrq) < 0)
75         {
76                 *channel = -1;
77                 return -1;
78         }
79
80         range = (struct iw_range *)buffer;
81
82         if( iw_ioctl(iwfd, ifname, SIOCGIWFREQ, &iwrq) >= 0 )
83         {
84                 cur_freq = ((double)iwrq.u.freq.m) * pow(10, iwrq.u.freq.e);
85                 if( cur_freq < 1000.00 )
86                 {
87                         *channel = (int)cur_freq;
88                         return 0;
89                 }
90
91                 for(i = 0; i < range->num_frequency; i++)
92                 {
93                         cmp_freq = ((double)range->freq[i].m) * pow(10, range->freq[i].e);
94                         if( cmp_freq == cur_freq )
95                         {
96                                 *channel = (int)range->freq[i].i;
97                                 return 0;
98                         }
99                 }
100         }
101
102         *channel = -1;
103         return -1;
104 }
105
106 /* Get the (first) pid of given process name */
107 static int find_process(const char *name)
108 {
109         int pid = -1;
110         int file;
111         char buffer[128];
112         char cmpname[128];
113         DIR *dir;
114         struct dirent *entry;
115
116         if( (dir = opendir("/proc")) != NULL )
117         {
118                 snprintf(cmpname, sizeof(cmpname), "Name:\t%s\n", name);
119
120                 while( (entry = readdir(dir)) != NULL )
121                 {
122                         if( !strcmp(entry->d_name, "..") || !isdigit(*entry->d_name) )
123                                 continue;
124
125                         sprintf(buffer, "/proc/%s/status", entry->d_name);
126                         if( (file = open(buffer, O_RDONLY)) > -1 )
127                         {
128                                 read(file, buffer, sizeof(buffer));
129                                 close(file);
130
131                                 if( strstr(buffer, cmpname) == buffer )
132                                 {
133                                         pid = atoi(entry->d_name);
134
135                                         /* Skip myself ... */
136                                         if( pid == getpid() )
137                                                 pid = -1;
138                                         else
139                                                 break;
140                                 }
141                         }
142                 }
143
144                 closedir(dir);
145                 return pid;
146         }
147
148         syslog(LOG_CRIT, "Unable to open /proc: %s",
149                 strerror(errno));
150
151         return -1;
152 }
153
154 /* Get the 5 minute load average */
155 static double find_loadavg(void)
156 {
157         int fd;
158         char buffer[10];
159         double load = 0.00;
160
161         if( (fd = open("/proc/loadavg", O_RDONLY)) > -1 )
162         {
163                 if( read(fd, buffer, sizeof(buffer)) == sizeof(buffer) )
164                         load = atof(&buffer[5]);
165
166                 close(fd);
167         }
168
169         return load;
170 }
171
172 /* Check if given uci file was updated */
173 static int check_uci_update(const char *config, time_t *mtime)
174 {
175         struct stat s;
176         char path[128];
177
178         snprintf(path, sizeof(path), "/var/state/%s", config);
179         if( stat(path, &s) > -1 )
180         {
181                 if( (*mtime == 0) || (s.st_mtime > *mtime) )
182                 {
183                         *mtime = s.st_mtime;
184                         return 1;
185                 }
186         }
187
188         return 0;
189 }
190
191 /* Add tuple */
192 static void load_wifi_uci_add_iface(const char *section, struct uci_itr_ctx *itr)
193 {
194         wifi_tuple_t *t;
195         const char *ucitmp;
196         int val = 0;
197
198         ucitmp = ucix_get_option(itr->ctx, "wireless", section, "mode");
199         if( ucitmp && !strncmp(ucitmp, "adhoc", 5) )
200         {
201                 if( (t = (wifi_tuple_t *)malloc(sizeof(wifi_tuple_t))) != NULL )
202                 {
203                         ucitmp = ucix_get_option(itr->ctx, "wireless", section, "ifname");
204                         if(ucitmp)
205                         {
206                                 strncpy(t->ifname, ucitmp, sizeof(t->ifname));
207                                 val++;
208                         }
209
210                         ucitmp = ucix_get_option(itr->ctx, "wireless", section, "bssid");
211                         if(ucitmp)
212                         {
213                                 strncpy(t->bssid, ucitmp, sizeof(t->bssid));
214                                 val++;
215                         }
216
217                         ucitmp = ucix_get_option(itr->ctx, "wireless", section, "device");
218                         if(ucitmp)
219                         {
220                                 ucitmp = ucix_get_option(itr->ctx, "wireless", ucitmp, "channel");
221                                 if(ucitmp)
222                                 {
223                                         t->channel = atoi(ucitmp);
224                                         val++;
225                                 }
226                         }
227
228                         if( val == 3 )
229                         {
230                                 syslog(LOG_INFO, "Monitoring %s: bssid=%s channel=%d",
231                                         t->ifname, t->bssid, t->channel);
232
233                                 t->next = itr->list;
234                                 itr->list = t;
235                         }
236                         else
237                         {
238                                 free(t);
239                         }
240                 }
241         }
242 }
243
244 /* Load config */
245 static wifi_tuple_t * load_wifi_uci(wifi_tuple_t *ifs, time_t *modtime)
246 {
247         struct uci_context *ctx;
248         struct uci_itr_ctx itr;
249         wifi_tuple_t *cur, *next;
250
251         if( check_uci_update("wireless", modtime) )
252         {
253                 syslog(LOG_INFO, "Config changed, reloading");
254
255                 if( (ctx = ucix_init("wireless")) != NULL )
256                 {
257                         if( ifs != NULL )
258                         {
259                                 for(cur = ifs; cur; cur = next)
260                                 {
261                                         next = cur->next;
262                                         free(cur);
263                                 }
264                         }
265
266                         itr.list = NULL;
267                         itr.ctx = ctx;
268
269                         ucix_for_each_section_type(ctx, "wireless", "wifi-iface",
270                                 (void *)load_wifi_uci_add_iface, &itr);
271
272                         return itr.list;
273                 }
274         }
275
276         return ifs;
277 }
278
279 /* Daemon implementation */
280 static int do_daemon(void)
281 {
282         static int wdtrigger = 1;
283         static int wdtimeout = BASE_INTERVAL * 2;
284         static const char wdkeepalive = WATCH_KEEPALIVE;
285
286         int iwfd;
287         int channel;
288         char bssid[18];
289         struct sigaction sa;
290
291         wifi_tuple_t *ifs = NULL, *curif;
292         time_t modtime = 0;
293
294         int action_intv = 0;
295         int restart_wifi = 0;
296         int restart_cron = 0;
297         int restart_sshd = 0;
298         int loadavg_panic = 0;
299
300         openlog(SYSLOG_IDENT, 0, LOG_DAEMON);
301         //daemon(1, 1);
302
303         if( (iwfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1 )
304         {
305                 syslog(LOG_ERR, "Can not open wireless control socket: %s",
306                         strerror(errno));
307
308                 return 1;
309         }
310
311         if( (wdfd = open(WATCH_DEVICE, O_WRONLY)) > -1 )
312         {
313                 syslog(LOG_INFO, "Opened %s - polling every %i seconds",
314                         WATCH_DEVICE, BASE_INTERVAL);
315
316                 /* Install signal handler to halt watchdog on shutdown */
317                 sa.sa_handler = shutdown_watchdog;
318                 sa.sa_flags = SA_NOCLDWAIT | SA_RESTART;
319                 sigaction(SIGHUP,  &sa, NULL);
320                 sigaction(SIGINT,  &sa, NULL);
321                 sigaction(SIGPIPE, &sa, NULL);
322                 sigaction(SIGTERM, &sa, NULL);
323                 sigaction(SIGUSR1, &sa, NULL);
324                 sigaction(SIGUSR2, &sa, NULL);
325
326                 /* Set watchdog timeout to twice the interval */
327                 ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtimeout);
328         }
329
330         while( 1 )
331         {
332                 /* Check/increment action interval */
333                 if( ++action_intv >= ACTION_INTERVAL )
334                 {
335                         /* Reset action interval */
336                         action_intv = 0;
337
338                         /* Check average load */
339                         if( find_loadavg() >= LOAD_TRESHOLD )
340                                 loadavg_panic++;
341                         else
342                                 loadavg_panic = 0;
343
344                         /* Check crond */
345                         if( find_process("crond") < 0 )
346                                 restart_cron++;
347                         else
348                                 restart_cron = 0;
349
350                         /* Check SSHd */
351                         if( find_process("dropbear") < 0 )
352                                 restart_sshd++;
353                         else
354                                 restart_sshd = 0;
355
356                         /* Check wireless interfaces */
357                         ifs = load_wifi_uci(ifs, &modtime);
358                         for( curif = ifs; curif; curif = curif->next )
359                         {
360                                 /* Get current channel and bssid */
361                                 if( (iw_get_bssid(iwfd, curif->ifname, bssid) == 0) &&
362                             (iw_get_channel(iwfd, curif->ifname, &channel) == 0) )
363                                 {
364                                         /* Check BSSID */
365                                         if( strcasecmp(bssid, curif->bssid) != 0 )
366                                         {
367                                                 syslog(LOG_WARNING, "BSSID mismatch on %s: current=%s wanted=%s",
368                                                         curif->ifname, bssid, curif->bssid);
369
370                                                 restart_wifi++;
371                                         }
372
373                                         /* Check channel */
374                                         else if( channel != curif->channel )
375                                         {
376                                                 syslog(LOG_WARNING, "Channel mismatch on %s: current=%d wanted=%d",
377                                                         curif->ifname, channel, curif->channel);
378
379                                                 restart_wifi++;
380                                         }
381                                 }
382                                 else
383                                 {
384                                         syslog(LOG_WARNING, "Requested interface %s not present", curif->ifname);
385                                 }
386                         }
387
388
389                         /* Wifi restart required? */
390                         if( restart_wifi >= HYSTERESIS )
391                         {
392                                 restart_wifi = 0;
393                                 syslog(LOG_WARNING, "Channel or BSSID mismatch on wireless interface, restarting");
394                                 EXEC(WIFI_ACTION);
395                         }
396
397                         /* Cron restart required? */
398                         if( restart_cron >= HYSTERESIS )
399                         {
400                                 restart_cron = 0;
401                                 syslog(LOG_WARNING, "The cron process died, restarting");
402                                 EXEC(CRON_ACTION);
403                         }
404
405                         /* SSHd restart required? */
406                         if( restart_sshd >= HYSTERESIS )
407                         {
408                                 restart_sshd = 0;
409                                 syslog(LOG_WARNING, "The ssh process died, restarting");
410                                 EXEC(SSHD_ACTION);
411                         }
412
413                         /* Is there a load problem? */
414                         if( loadavg_panic >= HYSTERESIS )
415                         {
416                                 syslog(LOG_EMERG, "Critical system load level, triggering reset!");
417
418                                 /* Try watchdog, fall back to reboot */
419                                 if( wdfd > -1 )
420                                         ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtrigger);
421                                 else
422                                         EXEC(LOAD_ACTION);
423                         }
424                 }
425
426
427                 /* Reset watchdog timer */
428                 if( wdfd > -1 )
429                         write(wdfd, &wdkeepalive, 1);
430
431                 sleep(BASE_INTERVAL);
432         }
433
434         shutdown_watchdog(0);
435         closelog();
436
437         return 0;
438 }
439
440
441 int main(int argc, char *argv[])
442 {
443         /* Check if watchdog is running ... */
444         if( (argc > 1) && (strcmp(argv[1], "running") == 0) )
445         {
446                 return (find_process(BINARY) == -1);
447         }
448
449         /* Start daemon */
450         return do_daemon();
451 }