Browse Source

make all monit network checks similar, with retry and timeouts.

Thomas Buck 2 years ago
parent
commit
89c73fc931
26 changed files with 80 additions and 20 deletions
  1. 1
    0
      roles/monitoring/files/etc_monit_conf.d_apache2
  2. 1
    0
      roles/monitoring/files/etc_monit_conf.d_commento
  3. 5
    2
      roles/monitoring/files/etc_monit_conf.d_dnsmasq
  4. 9
    3
      roles/monitoring/files/etc_monit_conf.d_dovecot
  5. 1
    0
      roles/monitoring/files/etc_monit_conf.d_fathom
  6. 1
    0
      roles/monitoring/files/etc_monit_conf.d_gitea
  7. 4
    1
      roles/monitoring/files/etc_monit_conf.d_grafana
  8. 1
    0
      roles/monitoring/files/etc_monit_conf.d_influxdb
  9. 2
    0
      roles/monitoring/files/etc_monit_conf.d_mastodon
  10. 1
    0
      roles/monitoring/files/etc_monit_conf.d_matrix
  11. 1
    0
      roles/monitoring/files/etc_monit_conf.d_mongodb
  12. 12
    3
      roles/monitoring/files/etc_monit_conf.d_mosquitto
  13. 5
    2
      roles/monitoring/files/etc_monit_conf.d_openvpn
  14. 4
    1
      roles/monitoring/files/etc_monit_conf.d_pgsql_deb10
  15. 4
    1
      roles/monitoring/files/etc_monit_conf.d_pgsql_deb11
  16. 4
    1
      roles/monitoring/files/etc_monit_conf.d_pgsql_deb9
  17. 4
    1
      roles/monitoring/files/etc_monit_conf.d_postfix
  18. 1
    0
      roles/monitoring/files/etc_monit_conf.d_prosody
  19. 1
    0
      roles/monitoring/files/etc_monit_conf.d_redis
  20. 1
    0
      roles/monitoring/files/etc_monit_conf.d_rocketchat
  21. 1
    0
      roles/monitoring/files/etc_monit_conf.d_rspamd
  22. 1
    0
      roles/monitoring/files/etc_monit_conf.d_slapd
  23. 4
    1
      roles/monitoring/files/etc_monit_conf.d_sshd
  24. 5
    2
      roles/monitoring/files/etc_monit_conf.d_tomcat_deb10
  25. 5
    2
      roles/monitoring/files/etc_monit_conf.d_tomcat_deb9
  26. 1
    0
      roles/monitoring/files/etc_monit_conf.d_znc

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_apache2 View File

4
   stop program = "/bin/systemctl stop apache2"
4
   stop program = "/bin/systemctl stop apache2"
5
   if failed host localhost port 80 protocol http
5
   if failed host localhost port 80 protocol http
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_commento View File

4
   stop program = "/bin/systemctl stop commento"
4
   stop program = "/bin/systemctl stop commento"
5
   if failed port 9100 protocol http
5
   if failed port 9100 protocol http
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 5
- 2
roles/monitoring/files/etc_monit_conf.d_dnsmasq View File

2
   group system
2
   group system
3
   start program = "/bin/systemctl start dnsmasq"
3
   start program = "/bin/systemctl start dnsmasq"
4
   stop program = "/bin/systemctl stop dnsmasq"
4
   stop program = "/bin/systemctl stop dnsmasq"
5
-  if failed port 53 type udp protocol dns then alert
6
-  if failed port 53 type udp protocol dns for 5 cycles then restart
5
+  if failed port 53 type udp protocol dns
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if 5 restarts within 5 cycles then timeout

+ 9
- 3
roles/monitoring/files/etc_monit_conf.d_dovecot View File

2
   group mail
2
   group mail
3
   start program = "/bin/systemctl start dovecot"
3
   start program = "/bin/systemctl start dovecot"
4
   stop program = "/bin/systemctl stop dovecot"
4
   stop program = "/bin/systemctl stop dovecot"
5
-  if failed port 993 type tcpssl sslauto protocol imap for 5 cycles then restart
6
-  if failed port 995 type tcpssl sslauto protocol pop for 5 cycles then restart
7
-  if 3 restarts within 5 cycles then timeout
5
+  if failed port 993 type tcpssl sslauto protocol imap
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if failed port 995 type tcpssl sslauto protocol pop
10
+    with timeout 10 seconds
11
+    with retry 2
12
+    then restart
13
+  if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_fathom View File

4
   stop program = "/bin/systemctl stop fathom-stats"
4
   stop program = "/bin/systemctl stop fathom-stats"
5
   if failed port 9000 protocol http
5
   if failed port 9000 protocol http
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_gitea View File

4
   stop program = "/bin/systemctl stop gitea"
4
   stop program = "/bin/systemctl stop gitea"
5
   if failed port 3000 protocol http
5
   if failed port 3000 protocol http
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_grafana View File

2
   group iot
2
   group iot
3
   start program = "/bin/systemctl start grafana-server"
3
   start program = "/bin/systemctl start grafana-server"
4
   stop program = "/bin/systemctl stop grafana-server"
4
   stop program = "/bin/systemctl stop grafana-server"
5
-  if failed host localhost port 2942 protocol http then restart
5
+  if failed port 2942 protocol http
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_influxdb View File

4
   stop program = "/bin/systemctl stop influxdb"
4
   stop program = "/bin/systemctl stop influxdb"
5
   if failed port 8086 type tcp
5
   if failed port 8086 type tcp
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 2
- 0
roles/monitoring/files/etc_monit_conf.d_mastodon View File

4
   stop program = "/bin/systemctl stop mastodon-web"
4
   stop program = "/bin/systemctl stop mastodon-web"
5
   if failed port 4220 type tcp
5
   if failed port 4220 type tcp
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout
9
 
10
 
13
   stop program = "/bin/systemctl stop mastodon-streaming"
14
   stop program = "/bin/systemctl stop mastodon-streaming"
14
   if failed port 4210 type tcp
15
   if failed port 4210 type tcp
15
     with timeout 10 seconds
16
     with timeout 10 seconds
17
+    with retry 2
16
     then restart
18
     then restart
17
   if 5 restarts within 5 cycles then timeout
19
   if 5 restarts within 5 cycles then timeout
18
 
20
 

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_matrix View File

4
   stop program = "/bin/systemctl stop matrix-synapse"
4
   stop program = "/bin/systemctl stop matrix-synapse"
5
   if failed port 8008 type tcp
5
   if failed port 8008 type tcp
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_mongodb View File

4
   stop program = "/bin/systemctl stop mongod"
4
   stop program = "/bin/systemctl stop mongod"
5
   if failed port 27017 type tcp
5
   if failed port 27017 type tcp
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 12
- 3
roles/monitoring/files/etc_monit_conf.d_mosquitto View File

2
   group iot
2
   group iot
3
   start program = "/bin/systemctl start mosquitto"
3
   start program = "/bin/systemctl start mosquitto"
4
   stop program = "/bin/systemctl stop mosquitto"
4
   stop program = "/bin/systemctl stop mosquitto"
5
-  if failed host localhost port 1883 type tcp then restart
6
-  if failed host localhost port 8883 type tcp then restart
7
-  if failed host localhost port 8083 type tcp then restart
5
+  if failed port 1883 type tcp
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if failed port 8883 type tcp
10
+    with timeout 10 seconds
11
+    with retry 2
12
+    then restart
13
+  if failed port 8083 type tcp
14
+    with timeout 10 seconds
15
+    with retry 2
16
+    then restart
8
   if 5 restarts within 5 cycles then timeout
17
   if 5 restarts within 5 cycles then timeout

+ 5
- 2
roles/monitoring/files/etc_monit_conf.d_openvpn View File

2
   group system
2
   group system
3
   start program = "/bin/systemctl start openvpn@server"
3
   start program = "/bin/systemctl start openvpn@server"
4
   stop program = "/bin/systemctl stop openvpn@server"
4
   stop program = "/bin/systemctl stop openvpn@server"
5
-  if failed port 1194 type udp then alert
6
-  if failed port 1194 type udp for 5 cycles then restart
5
+  if failed port 1194 type udp
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_pgsql_deb10 View File

2
   group database
2
   group database
3
   start program = "/bin/systemctl start postgresql"
3
   start program = "/bin/systemctl start postgresql"
4
   stop program = "/bin/systemctl stop postgresql"
4
   stop program = "/bin/systemctl stop postgresql"
5
-  if failed host localhost port 5432 protocol pgsql then restart
5
+  if failed port 5432 protocol pgsql
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_pgsql_deb11 View File

2
   group database
2
   group database
3
   start program = "/bin/systemctl start postgresql"
3
   start program = "/bin/systemctl start postgresql"
4
   stop program = "/bin/systemctl stop postgresql"
4
   stop program = "/bin/systemctl stop postgresql"
5
-  if failed host localhost port 5432 protocol pgsql then restart
5
+  if failed port 5432 protocol pgsql
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_pgsql_deb9 View File

2
   group database
2
   group database
3
   start program = "/bin/systemctl start postgresql"
3
   start program = "/bin/systemctl start postgresql"
4
   stop program = "/bin/systemctl stop postgresql"
4
   stop program = "/bin/systemctl stop postgresql"
5
-  if failed host localhost port 5432 protocol pgsql then restart
5
+  if failed port 5432 protocol pgsql
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_postfix View File

2
   group mail
2
   group mail
3
   start program = "/bin/systemctl start postfix"
3
   start program = "/bin/systemctl start postfix"
4
   stop program = "/bin/systemctl stop postfix"
4
   stop program = "/bin/systemctl stop postfix"
5
-  if failed port 25 protocol smtp then restart
5
+  if failed port 25 protocol smtp
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_prosody View File

4
   stop program = "/bin/systemctl stop prosody"
4
   stop program = "/bin/systemctl stop prosody"
5
   if failed port 5222 type tcp
5
   if failed port 5222 type tcp
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_redis View File

4
   stop program = "/bin/systemctl stop redis"
4
   stop program = "/bin/systemctl stop redis"
5
   if failed port 6379 type tcp
5
   if failed port 6379 type tcp
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_rocketchat View File

4
   stop program = "/bin/systemctl stop rocketchat"
4
   stop program = "/bin/systemctl stop rocketchat"
5
   if failed port 3042 protocol http
5
   if failed port 3042 protocol http
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_rspamd View File

4
   stop program = "/bin/systemctl stop rspamd"
4
   stop program = "/bin/systemctl stop rspamd"
5
   if failed port 11333 type tcp
5
   if failed port 11333 type tcp
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_slapd View File

4
   stop program = "/bin/systemctl stop slapd"
4
   stop program = "/bin/systemctl stop slapd"
5
   if failed port 389 protocol LDAP3
5
   if failed port 389 protocol LDAP3
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 4
- 1
roles/monitoring/files/etc_monit_conf.d_sshd View File

2
   group system
2
   group system
3
   start program = "/bin/systemctl start ssh"
3
   start program = "/bin/systemctl start ssh"
4
   stop program = "/bin/systemctl stop ssh"
4
   stop program = "/bin/systemctl stop ssh"
5
-  if failed host 127.0.0.1 port 22 protocol ssh then restart
5
+  if failed port 22 protocol ssh
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
6
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

+ 5
- 2
roles/monitoring/files/etc_monit_conf.d_tomcat_deb10 View File

2
   group mail
2
   group mail
3
   start program = "/bin/systemctl start tomcat9"
3
   start program = "/bin/systemctl start tomcat9"
4
   stop program = "/bin/systemctl stop tomcat9"
4
   stop program = "/bin/systemctl stop tomcat9"
5
-  if failed port 8080 then alert
6
-  if failed port 8080 for 5 cycles then restart
5
+  if failed port 8080
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if 5 restarts within 5 cycles then timeout

+ 5
- 2
roles/monitoring/files/etc_monit_conf.d_tomcat_deb9 View File

2
   group mail
2
   group mail
3
   start program = "/bin/systemctl start tomcat8"
3
   start program = "/bin/systemctl start tomcat8"
4
   stop program = "/bin/systemctl stop tomcat8"
4
   stop program = "/bin/systemctl stop tomcat8"
5
-  if failed port 8080 then alert
6
-  if failed port 8080 for 5 cycles then restart
5
+  if failed port 8080
6
+    with timeout 10 seconds
7
+    with retry 2
8
+    then restart
9
+  if 5 restarts within 5 cycles then timeout

+ 1
- 0
roles/monitoring/files/etc_monit_conf.d_znc View File

4
   stop program = "/bin/systemctl stop znc"
4
   stop program = "/bin/systemctl stop znc"
5
   if failed port 6643 type tcp
5
   if failed port 6643 type tcp
6
     with timeout 10 seconds
6
     with timeout 10 seconds
7
+    with retry 2
7
     then restart
8
     then restart
8
   if 5 restarts within 5 cycles then timeout
9
   if 5 restarts within 5 cycles then timeout

Loading…
Cancel
Save