Monit est un logiciel de monitoring dédié à la surveillance locale d’un serveur et de ses services.  Il peut effectuer des tests précis et effectuer des actions en retour, comme le redémarrage d’un logiciel si la charge CPU est trop importante. Bref, c’est simple et çà marche.

Je vous colle la configuration pour une installation LAMP sous Debian, regroupant les tests de disponibilité réseau SSH / Apache / MySQL, la vérification de l’espace disque, contrôle de la charge système et de l’utilisation CPU.

set logfile /var/log/monit.log
set mailserver localhost
set mail-format { from: monit@my.domain }
set alert admin@my.domain
set eventqueue
 basedir /var/monit
 slots 100

set httpd port 2812 and
 ssl enable
 pemfile /etc/monit/monit.pem
 allow admin:pa$$word

check system localhost
 if loadavg (1min) > 4 then alert
 if loadavg (5min) > 2 then alert
 if memory usage > 75% then alert
 if cpu usage (user) > 70% then alert
 if cpu usage (system) > 30% then alert
 if cpu usage (wait) > 20% then alert

check device rootfs with path /
 if space usage > 90% then alert

check directory bin with path /bin
 if failed permission 755 then unmonitor
 if failed uid 0 then unmonitor
 if failed gid 0 then unmonitor

check process cron with pidfile /var/run/crond.pid
 group system
 start program = "/etc/init.d/cron start"
 stop  program = "/etc/init.d/cron stop"
 if 5 restarts within 5 cycles then timeout
 depends on cron_rc
check file cron_rc with path /etc/init.d/cron
 group system
 if failed checksum then unmonitor
 if failed permission 755 then unmonitor
 if failed uid root then unmonitor
 if failed gid root then unmonitor

check process syslog with pidfile /var/run/rsyslogd.pid
 group system
 start program = "/etc/init.d/rsyslog start"
 stop program = "/etc/init.d/rsyslog stop"
 if 5 restarts within 5 cycles then timeout
check file syslog_file with path /var/log/syslog

check process sshd with pidfile /var/run/sshd.pid
 group system
 start program  "/etc/init.d/ssh start"
 stop program  "/etc/init.d/ssh stop"
 if failed port 22 protocol ssh then restart
 if 5 restarts within 5 cycles then timeout
check file ssh_bin with path /usr/sbin/sshd
 group system
 if failed checksum then unmonitor
 if failed permission 755 then unmonitor
 if failed uid root then unmonitor
 if failed gid root then unmonitor
check file ssh_rc with path /etc/init.d/ssh
 group system
 if failed checksum then unmonitor
 if failed permission 755 then unmonitor
 if failed uid root then unmonitor
 if failed gid root then unmonitor

check process apache with pidfile /var/run/apache2.pid
 group web
 start program = "/etc/init.d/apache2 start"
 stop program  = "/etc/init.d/apache2 stop"
 if failed host 127.0.0.1 port 80 protocol http and request "/index.html" then restart
 if cpu is greater than 60% for 2 cycles then alert
 if cpu > 80% for 5 cycles then restart
 if children > 250 then restart
 if 3 restarts within 5 cycles then timeout
check file apache_bin with path /usr/sbin/apache2
 group apache
 if failed checksum then unmonitor
 if failed permission 755 then unmonitor
 if failed uid root then unmonitor
 if failed gid root then unmonitor
check file apache_rc with path /etc/init.d/apache2
 group apache
 if failed checksum then unmonitor
 if failed permission 755 then unmonitor
 if failed uid root then unmonitor
 if failed gid root then unmonitor

check process mysql with pidfile /var/run/mysqld/mysqld.pid
 group database
 start program = "/etc/init.d/mysql start"
 stop program = "/etc/init.d/mysql stop"
 if failed host 127.0.0.1 port 3306 protocol mysql then restart
 if 5 restarts within 5 cycles then timeout
 depends on mysql_bin
 depends on mysql_rc
check file mysql_bin with path /usr/sbin/mysqld
 group database
 if failed checksum then unmonitor
 if failed permission 755 then unmonitor
 if failed uid root then unmonitor
 if failed gid root then unmonitor
check file mysql_rc with path /etc/init.d/mysql
 group database
 if failed checksum then unmonitor
 if failed permission 755 then unmonitor
 if failed uid root then unmonitor
 if failed gid root then unmonitor

Un petit screenshot pour présenter le résultat :