Browse Source

make all monit network checks similar, with retry and timeouts.

Thomas Buck 1 year ago
parent
commit
89c73fc931
26 changed files with 80 additions and 20 deletions
  1. 1
    0
      roles/monitoring/files/etc_monit_conf.d_apache2
  2. 1
    0
      roles/monitoring/files/etc_monit_conf.d_commento
  3. 5
    2
      roles/monitoring/files/etc_monit_conf.d_dnsmasq
  4. 9
    3
      roles/monitoring/files/etc_monit_conf.d_dovecot
  5. 1
    0
      roles/monitoring/files/etc_monit_conf.d_fathom
  6. 1
    0
      roles/monitoring/files/etc_monit_conf.d_gitea
  7. 4
    1
      roles/monitoring/files/etc_monit_conf.d_grafana
  8. 1
    0
      roles/monitoring/files/etc_monit_conf.d_influxdb
  9. 2
    0
      roles/monitoring/files/etc_monit_conf.d_mastodon
  10. 1
    0
      roles/monitoring/files/etc_monit_conf.d_matrix
  11. 1
    0
      roles/monitoring/files/etc_monit_conf.d_mongodb
  12. 12
    3
      roles/monitoring/files/etc_monit_conf.d_mosquitto
  13. 5
    2
      roles/monitoring/files/etc_monit_conf.d_openvpn
  14. 4
    1
      roles/monitoring/files/etc_monit_conf.d_pgsql_deb10
  15. 4
    1
      roles/monitoring/files/etc_monit_conf.d_pgsql_deb11
  16. 4
    1
      roles/monitoring/files/etc_monit_conf.d_pgsql_deb9
  17. 4
    1
      roles/monitoring/files/etc_monit_conf.d_postfix
  18. 1
    0
      roles/monitoring/files/etc_monit_conf.d_prosody
  19. 1
    0
      roles/monitoring/files/etc_monit_conf.d_redis
  20. 1
    0
      roles/monitoring/files/etc_monit_conf.d_rocketchat
  21. 1
    0
      roles/monitoring/files/etc_monit_conf.d_rspamd
  22. 1
    0
      roles/monitoring/files/etc_monit_conf.d_slapd
  23. 4
    1
      roles/monitoring/files/etc_monit_conf.d_sshd
  24. 5
    2
      roles/monitoring/files/etc_monit_conf.d_tomcat_deb10
  25. 5
    2
      roles/monitoring/files/etc_monit_conf.d_tomcat_deb9
  26. 1
    0
      roles/monitoring/files/etc_monit_conf.d_znc

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_apache2 View File

@@ -4,5 +4,6 @@ check process apache2 with pidfile /var/run/apache2/apache2.pid
4 4
   stop program = "/bin/systemctl stop apache2"
5 5
   if failed host localhost port 80 protocol http
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_commento View File

@@ -4,5 +4,6 @@ check process commento matching "commento"
4 4
   stop program = "/bin/systemctl stop commento"
5 5
   if failed port 9100 protocol http
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 5
- 2
roles/monitoring/files/etc_monit_conf.d_dnsmasq View File

@@ -2,5 +2,8 @@ check process dnsmasq with pidfile "/run/dnsmasq/dnsmasq.pid"
2 2
   group system
3 3
   start program = "/bin/systemctl start dnsmasq"
4 4
   stop program = "/bin/systemctl stop dnsmasq"
5
-  if failed port 53 type udp protocol dns then alert
6
-  if failed port 53 type udp protocol dns for 5 cycles then restart
5
+  if failed port 53 type udp protocol dns
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if 5 restarts within 5 cycles then timeout

+ 9
- 3
roles/monitoring/files/etc_monit_conf.d_dovecot View File

@@ -2,6 +2,12 @@ check process dovecot with pidfile /var/run/dovecot/master.pid
2 2
   group mail
3 3
   start program = "/bin/systemctl start dovecot"
4 4
   stop program = "/bin/systemctl stop dovecot"
5
-  if failed port 993 type tcpssl sslauto protocol imap for 5 cycles then restart
6
-  if failed port 995 type tcpssl sslauto protocol pop for 5 cycles then restart
7
-  if 3 restarts within 5 cycles then timeout
5
+  if failed port 993 type tcpssl sslauto protocol imap
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if failed port 995 type tcpssl sslauto protocol pop
10
+    with timeout 10 seconds
11
+    with retry 2
12
+    then restart
13
+  if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_fathom View File

@@ -4,5 +4,6 @@ check process fathom matching fathom
4 4
   stop program = "/bin/systemctl stop fathom-stats"
5 5
   if failed port 9000 protocol http
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_gitea View File

@@ -4,5 +4,6 @@ check process gitea matching gitea
4 4
   stop program = "/bin/systemctl stop gitea"
5 5
   if failed port 3000 protocol http
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_grafana View File

@@ -2,5 +2,8 @@ check process grafana with pidfile /var/run/grafana/grafana-server.pid
2 2
   group iot
3 3
   start program = "/bin/systemctl start grafana-server"
4 4
   stop program = "/bin/systemctl stop grafana-server"
5
-  if failed host localhost port 2942 protocol http then restart
5
+  if failed port 2942 protocol http
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6 9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_influxdb View File

@@ -4,5 +4,6 @@ check process influxdb matching "influxd"
4 4
   stop program = "/bin/systemctl stop influxdb"
5 5
   if failed port 8086 type tcp
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 2
- 0
roles/monitoring/files/etc_monit_conf.d_mastodon View File

@@ -4,6 +4,7 @@ check process mastodon-web matching "puma [0-9.]* \(tcp://0.0.0.0:4220\) \[masto
4 4
   stop program = "/bin/systemctl stop mastodon-web"
5 5
   if failed port 4220 type tcp
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout
9 10
 
@@ -13,6 +14,7 @@ check process mastodon-streaming matching "/home/mastodon/mastodon/streaming/ind
13 14
   stop program = "/bin/systemctl stop mastodon-streaming"
14 15
   if failed port 4210 type tcp
15 16
     with timeout 10 seconds
17
+    with retry 2
16 18
     then restart
17 19
   if 5 restarts within 5 cycles then timeout
18 20
 

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_matrix View File

@@ -4,5 +4,6 @@ check process synapse matching /opt/venvs/matrix-synapse/bin/python
4 4
   stop program = "/bin/systemctl stop matrix-synapse"
5 5
   if failed port 8008 type tcp
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_mongodb View File

@@ -4,5 +4,6 @@ check process mongodb matching "mongod"
4 4
   stop program = "/bin/systemctl stop mongod"
5 5
   if failed port 27017 type tcp
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 12
- 3
roles/monitoring/files/etc_monit_conf.d_mosquitto View File

@@ -2,7 +2,16 @@ check process mosquitto matching mosquitto
2 2
   group iot
3 3
   start program = "/bin/systemctl start mosquitto"
4 4
   stop program = "/bin/systemctl stop mosquitto"
5
-  if failed host localhost port 1883 type tcp then restart
6
-  if failed host localhost port 8883 type tcp then restart
7
-  if failed host localhost port 8083 type tcp then restart
5
+  if failed port 1883 type tcp
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if failed port 8883 type tcp
10
+    with timeout 10 seconds
11
+    with retry 2
12
+    then restart
13
+  if failed port 8083 type tcp
14
+    with timeout 10 seconds
15
+    with retry 2
16
+    then restart
8 17
   if 5 restarts within 5 cycles then timeout

+ 5
- 2
roles/monitoring/files/etc_monit_conf.d_openvpn View File

@@ -2,5 +2,8 @@ check process openvpn with pidfile "/run/openvpn/server.pid"
2 2
   group system
3 3
   start program = "/bin/systemctl start openvpn@server"
4 4
   stop program = "/bin/systemctl stop openvpn@server"
5
-  if failed port 1194 type udp then alert
6
-  if failed port 1194 type udp for 5 cycles then restart
5
+  if failed port 1194 type udp
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_pgsql_deb10 View File

@@ -2,5 +2,8 @@ check process postgres with pidfile /var/run/postgresql/11-main.pid
2 2
   group database
3 3
   start program = "/bin/systemctl start postgresql"
4 4
   stop program = "/bin/systemctl stop postgresql"
5
-  if failed host localhost port 5432 protocol pgsql then restart
5
+  if failed port 5432 protocol pgsql
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6 9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_pgsql_deb11 View File

@@ -2,5 +2,8 @@ check process postgres with pidfile /var/run/postgresql/13-main.pid
2 2
   group database
3 3
   start program = "/bin/systemctl start postgresql"
4 4
   stop program = "/bin/systemctl stop postgresql"
5
-  if failed host localhost port 5432 protocol pgsql then restart
5
+  if failed port 5432 protocol pgsql
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6 9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_pgsql_deb9 View File

@@ -2,5 +2,8 @@ check process postgres with pidfile /var/run/postgresql/9.6-main.pid
2 2
   group database
3 3
   start program = "/bin/systemctl start postgresql"
4 4
   stop program = "/bin/systemctl stop postgresql"
5
-  if failed host localhost port 5432 protocol pgsql then restart
5
+  if failed port 5432 protocol pgsql
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6 9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_postfix View File

@@ -2,5 +2,8 @@ check process postfix with pidfile /var/spool/postfix/pid/master.pid
2 2
   group mail
3 3
   start program = "/bin/systemctl start postfix"
4 4
   stop program = "/bin/systemctl stop postfix"
5
-  if failed port 25 protocol smtp then restart
5
+  if failed port 25 protocol smtp
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6 9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_prosody View File

@@ -4,5 +4,6 @@ check process prosody with pidfile /var/run/prosody/prosody.pid
4 4
   stop program = "/bin/systemctl stop prosody"
5 5
   if failed port 5222 type tcp
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_redis View File

@@ -4,5 +4,6 @@ check process redis with pidfile /var/run/redis/redis-server.pid
4 4
   stop program = "/bin/systemctl stop redis"
5 5
   if failed port 6379 type tcp
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_rocketchat View File

@@ -4,5 +4,6 @@ check process rocketchat matching "Rocket.Chat"
4 4
   stop program = "/bin/systemctl stop rocketchat"
5 5
   if failed port 3042 protocol http
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_rspamd View File

@@ -4,5 +4,6 @@ check process rspamd matching rspamd
4 4
   stop program = "/bin/systemctl stop rspamd"
5 5
   if failed port 11333 type tcp
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_slapd View File

@@ -4,5 +4,6 @@ check process slapd with pidfile /var/run/slapd/slapd.pid
4 4
   stop program = "/bin/systemctl stop slapd"
5 5
   if failed port 389 protocol LDAP3
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_sshd View File

@@ -2,5 +2,8 @@ check process sshd with pidfile /var/run/sshd.pid
2 2
   group system
3 3
   start program = "/bin/systemctl start ssh"
4 4
   stop program = "/bin/systemctl stop ssh"
5
-  if failed host 127.0.0.1 port 22 protocol ssh then restart
5
+  if failed port 22 protocol ssh
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6 9
   if 5 restarts within 5 cycles then timeout

+ 5
- 2
roles/monitoring/files/etc_monit_conf.d_tomcat_deb10 View File

@@ -2,5 +2,8 @@ check process tomcat matching tomcat9
2 2
   group mail
3 3
   start program = "/bin/systemctl start tomcat9"
4 4
   stop program = "/bin/systemctl stop tomcat9"
5
-  if failed port 8080 then alert
6
-  if failed port 8080 for 5 cycles then restart
5
+  if failed port 8080
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if 5 restarts within 5 cycles then timeout

+ 5
- 2
roles/monitoring/files/etc_monit_conf.d_tomcat_deb9 View File

@@ -2,5 +2,8 @@ check process tomcat with pidfile "/var/run/tomcat8.pid"
2 2
   group mail
3 3
   start program = "/bin/systemctl start tomcat8"
4 4
   stop program = "/bin/systemctl stop tomcat8"
5
-  if failed port 8080 then alert
6
-  if failed port 8080 for 5 cycles then restart
5
+  if failed port 8080
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_znc View File

@@ -4,5 +4,6 @@ check process znc with pidfile /var/run/znc/znc.pid
4 4
   stop program = "/bin/systemctl stop znc"
5 5
   if failed port 6643 type tcp
6 6
     with timeout 10 seconds
7
+    with retry 2
7 8
     then restart
8 9
   if 5 restarts within 5 cycles then timeout

Loading…
Cancel
Save