app/monitoring.py
2021-10-12 15:03:16 +02:00

64 lines
1.6 KiB
Python

import os
from time import sleep
from app.config import HOST
from app.db import Session
from app.log import LOG
from app.models import Monitoring
# the number of consecutive fails
# if more than _max_nb_fails, alert
# reset whenever the system comes back to normal
# a system is considered fail if incoming_queue + active_queue > 50
_nb_failed = 0
_max_nb_fails = 10
# the maximum number of emails in incoming & active queue
_max_incoming = 50
def get_stats():
"""Look at different metrics and alert appropriately"""
incoming_queue = nb_files("/var/spool/postfix/incoming")
active_queue = nb_files("/var/spool/postfix/active")
deferred_queue = nb_files("/var/spool/postfix/deferred")
LOG.d("postfix queue sizes %s %s %s", incoming_queue, active_queue, deferred_queue)
Monitoring.create(
host=HOST,
incoming_queue=incoming_queue,
active_queue=active_queue,
deferred_queue=deferred_queue,
)
Session.commit()
global _nb_failed
# alert when too many emails in incoming + active queue
if incoming_queue > _max_incoming:
_nb_failed += 1
if _nb_failed > _max_nb_fails:
# reset
_nb_failed = 0
LOG.e(
"Too many emails in incoming & active queue %s %s",
incoming_queue,
active_queue,
)
else:
_nb_failed = 0
def nb_files(directory) -> int:
"""return the number of files in directory and its sub-directories"""
return sum(len(files) for _, _, files in os.walk(directory))
if __name__ == "__main__":
while True:
get_stats()
# 1 min
sleep(60)