contrib/package/freifunk-watchdog: workaround CONFIG_WATCHDOG_NOWAYOUT, trap signals
[project/luci.git] / contrib / package / freifunk-watchdog / src / watchdog.c
index aea0516..7bbb7c7 100644 (file)
 
 #include "watchdog.h"
 
+/* Global watchdog fd, required by signal handler */
+int wdfd = -1;
+
+/* Watchdog shutdown helper */
+static void shutdown_watchdog(int sig)
+{
+       static int wdelay = 3600;
+       static const char wshutdown = WATCH_SHUTDOWN;
+
+       if( wdfd > -1 )
+       {
+               syslog(LOG_INFO, "Stopping watchdog timer");
+               write(wdfd, &wshutdown, 1);
+
+               /* Older Kamikaze versions are compiled with
+                * CONFIG_WATCHDOG_NOWAYOUT=y, this can be
+                * harmful if we're in the middle of an upgrade.
+                * Increase the watchdog timeout to 3600 seconds
+                * here to avoid unplanned reboots. */
+               ioctl(wdfd, WDIOC_SETTIMEOUT, &wdelay);
+
+               close(wdfd);
+               wdfd = -1;
+       }
+
+       exit(0);
+}
+
 /* Get BSSID of given interface */
 static int iw_get_bssid(int iwfd, const char *ifname, char *bssid)
 {
@@ -69,7 +97,7 @@ static int iw_get_channel(int iwfd, const char *ifname, int *channel)
                }
 
                for(i = 0; i < range->num_frequency; i++)
-           {
+               {
                        cmp_freq = ((double)range->freq[i].m) * pow(10, range->freq[i].e);
                        if( cmp_freq == cur_freq )
                        {
@@ -111,7 +139,12 @@ static int find_process(const char *name)
                                if( strstr(buffer, cmpname) == buffer )
                                {
                                        pid = atoi(entry->d_name);
-                                       break;
+
+                                       /* Skip myself ... */
+                                       if( pid == getpid() )
+                                               pid = -1;
+                                       else
+                                               break;
                                }
                        }
                }
@@ -126,13 +159,31 @@ static int find_process(const char *name)
        return -1;
 }
 
+/* Get the 5 minute load average */
+static double find_loadavg(void)
+{
+       int fd;
+       char buffer[10];
+       double load = 0.00;
+
+       if( (fd = open("/proc/loadavg", O_RDONLY)) > -1 )
+       {
+               if( read(fd, buffer, sizeof(buffer)) == sizeof(buffer) )
+                       load = atof(&buffer[5]);
+
+               close(fd);
+       }
+
+       return load;
+}
+
 /* Check if given uci file was updated */
 static int check_uci_update(const char *config, time_t *mtime)
 {
        struct stat s;
        char path[128];
 
-       snprintf(path, sizeof(path), "/etc/config/%s", config);
+       snprintf(path, sizeof(path), "/var/state/%s", config);
        if( stat(path, &s) > -1 )
        {
                if( (*mtime == 0) || (s.st_mtime > *mtime) )
@@ -140,20 +191,8 @@ static int check_uci_update(const char *config, time_t *mtime)
                        *mtime = s.st_mtime;
                        return 1;
                }
-               else
-               {
-                       snprintf(path, sizeof(path), "/var/state/%s", config);
-                       if( stat(path, &s) > -1 )
-                       {
-                               if( (*mtime == 0) || (s.st_mtime > *mtime) )
-                               {
-                                       *mtime = s.st_mtime;
-                                       return 1;
-                               }
-                       }
 
-                       return 0;
-               }
+               return 0;
        }
 
        return -1;
@@ -192,7 +231,7 @@ static void load_wifi_uci_add_iface(const char *section, struct uci_itr_ctx *itr
                                val++;
                        }
                }
-       
+
                if( val == 3 )
                {
                        syslog(LOG_INFO, "Monitoring %s: bssid=%s channel=%d",
@@ -246,41 +285,71 @@ static wifi_tuple_t * load_wifi_uci(wifi_tuple_t *ifs, time_t *modtime)
 /* Daemon implementation */
 static int do_daemon(void)
 {
+       static int wdtrigger = 1;
+       static int wdtimeout = INTERVAL * 2;
+       static const char wdkeepalive = WATCH_KEEPALIVE;
+
        int iwfd;
        int channel;
        char bssid[18];
+       struct sigaction sa;
 
        wifi_tuple_t *ifs = NULL, *curif;
        time_t modtime = 0;
 
        int restart_wifi = 0;
        int restart_cron = 0;
+       int restart_sshd = 0;
+       int loadavg_panic = 0;
 
        openlog(SYSLOG_IDENT, 0, LOG_DAEMON);
        //daemon(1, 1);
 
        if( (iwfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1 )
        {
-               perror("Can not open wireless control socket");
+               syslog(LOG_ERR, "Can not open wireless control socket: %s",
+                       strerror(errno));
+
                return 1;
        }
 
+       if( (wdfd = open(WATCH_DEVICE, O_WRONLY)) > -1 )
+       {
+               syslog(LOG_INFO, "Opened %s - polling every %i seconds",
+                       WATCH_DEVICE, INTERVAL);
+
+               /* Install signal handler to halt watchdog on shutdown */
+               sa.sa_handler = shutdown_watchdog;
+               sa.sa_flags = SA_NOCLDWAIT | SA_RESTART;
+               sigaction(SIGHUP,  &sa, NULL);
+               sigaction(SIGINT,  &sa, NULL);
+               sigaction(SIGPIPE, &sa, NULL);
+               sigaction(SIGTERM, &sa, NULL);
+               sigaction(SIGUSR1, &sa, NULL);
+               sigaction(SIGUSR2, &sa, NULL);
+
+               /* Set watchdog timeout to twice the interval */
+               ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtimeout);
+       }
+
        while( 1 )
        {
-               if( (ifs = load_wifi_uci(ifs, &modtime)) == NULL )
-               {
-                       printf("Can not load wireless uci. File corrupt?\n");
-                       return 1;
-               }
+               /* Check average load */
+               if( find_loadavg() >= LOAD_TRESHOLD )
+                       loadavg_panic++;
+               else
+                       loadavg_panic = 0;
 
                /* Check crond */
                if( find_process("crond") < 0 )
-               {
-                       syslog(LOG_WARNING, "The crond process died, restarting");
-                       restart_cron++;         
-               }
+                       restart_cron++;
+
+               /* Check SSHd */
+               if( find_process("dropbear") < 0 )
+                       restart_sshd++;
 
                /* Check wireless interfaces */
+               ifs = load_wifi_uci(ifs, &modtime);
                for( curif = ifs; curif; curif = curif->next )
                {
                        /* Get current channel and bssid */
@@ -297,7 +366,7 @@ static int do_daemon(void)
                                }
 
                                /* Check channel */
-                               if( channel != curif->channel )
+                               else if( channel != curif->channel )
                                {
                                        syslog(LOG_WARNING, "Channel mismatch on %s: current=%d wanted=%d",
                                                curif->ifname, channel, curif->channel);
@@ -307,29 +376,57 @@ static int do_daemon(void)
                        }
                        else
                        {
-                               syslog(LOG_WARNING, "Requested interface %s not present", curif->ifname);                               
+                               syslog(LOG_WARNING, "Requested interface %s not present", curif->ifname);
                        }
                }
 
 
                /* Wifi restart required? */
-               if( restart_wifi > 0 )
+               if( restart_wifi >= HYSTERESIS )
                {
                        restart_wifi = 0;
+                       syslog(LOG_WARNING, "Channel or BSSID mismatch on wireless interface, restarting");
                        EXEC(WIFI_ACTION);
                }
 
                /* Cron restart required? */
-               if( restart_cron > 0 )
+               if( restart_cron >= HYSTERESIS )
                {
                        restart_cron = 0;
-                       EXEC(CRON_ACTION);      
+                       syslog(LOG_WARNING, "The cron process died, restarting");
+                       EXEC(CRON_ACTION);
+               }
+
+               /* SSHd restart required? */
+               if( restart_sshd >= HYSTERESIS )
+               {
+                       restart_sshd = 0;
+                       syslog(LOG_WARNING, "The ssh process died, restarting");
+                       EXEC(SSHD_ACTION);
+               }
+
+               /* Is there a load problem? */
+               if( loadavg_panic >= HYSTERESIS )
+               {
+                       syslog(LOG_EMERG, "Critical system load level, triggering reset!");
+
+                       /* Try watchdog, fall back to reboot */
+                       if( wdfd > -1 )
+                               ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtrigger);
+                       else
+                               EXEC(LOAD_ACTION);
                }
 
+               /* Reset watchdog timer */
+               if( wdfd > -1 )
+                       write(wdfd, &wdkeepalive, 1);
+
                sleep(INTERVAL);
        }
 
+       shutdown_watchdog(0);
        closelog();
+
        return 0;
 }
 
@@ -339,7 +436,7 @@ int main(int argc, char *argv[])
        /* Check if watchdog is running ... */
        if( (argc > 1) && (strcmp(argv[1], "running") == 0) )
        {
-               return (find_process(BINARY) >= 0);
+               return (find_process(BINARY) == -1);
        }
 
        /* Start daemon */