From f6ab1dc4ce0a65b07422ed708bbf687e814d62c7 Mon Sep 17 00:00:00 2001 From: Dirk Alders Date: Wed, 20 Dec 2023 09:36:42 +0100 Subject: [PATCH] unknown state reduced to hearbeat monitoring timings and status --- z_server/devices/__init__.py | 67 +++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/z_server/devices/__init__.py b/z_server/devices/__init__.py index 0b99e8d..bc8c413 100644 --- a/z_server/devices/__init__.py +++ b/z_server/devices/__init__.py @@ -12,6 +12,10 @@ logger = logging.getLogger(ROOT_LOGGER_NAME).getChild(__name__) class base(object): + MONITORING_HEARTBEAT = "heartbeat" + MONITORING_BATTERY = "battery" + MONITORING_FOLLOW_SETPOINT = "follow_setpoint" + # FOLLOW_REQUEST_WARNING = 5 # Seconds, till warning comes up, if device does not follow the command FOLLOW_REQUEST_ERROR = 60 # Seconds, till error comes up, if device does not follow the command FOLLOW_KEY = None @@ -19,12 +23,14 @@ class base(object): BATTERY_LVL_WARNING = 15 BATTERY_LVL_ERROR = 5 # - LAST_MSG_WARNING = 6 * 24 * 60 * 60 - LAST_MSG_ERROR = 24 * 24 * 60 * 60 + LAST_MSG_WARNING = 6 * 60 * 60 + LAST_MSG_ERROR = 24 * 60 * 60 def __init__(self, mqtt_client: mqtt.mqtt_client, topic): self.topic = topic # + self.__unknown_tm__ = {} + # mqtt_client.add_callback(topic, self.__rx__) mqtt_client.add_callback(topic + '/#', self.__rx__) # @@ -58,8 +64,8 @@ class base(object): # # battery level # - if "battery" in payload and message.topic == self.topic: - self.battery = payload["battery"] + if self.MONITORING_BATTERY in payload and message.topic == self.topic: + self.battery = payload[self.MONITORING_BATTERY] def target(self, key, value): tm_t, value_t = self.__target_storage__.get(key, (0, None)) @@ -75,51 +81,70 @@ class base(object): # # HEARTBEAT # - if key == "heartbeat": + if key == self.MONITORING_HEARTBEAT: if self.last_device_msg is None: - return {"status": nagios.Nagios.UNKNOWN, "msg": "Device exists, but no data received or unknown monitoring"} + return self.__nagios_return__(self.MONITORING_HEARTBEAT, nagios.Nagios.UNKNOWN, "Device exists, but no data received") else: dt = time.time() - self.last_device_msg dt_disp = dt / 60 / 60 if dt > self.LAST_MSG_ERROR: - return {"status": nagios.Nagios.ERROR, "msg": "Last message %.1fh ago" % dt_disp} + return self.__nagios_return__(self.MONITORING_HEARTBEAT, nagios.Nagios.ERROR, "Last message %.1fh ago" % dt_disp) elif dt > self.LAST_MSG_WARNING: - return {"status": nagios.Nagios.WARNING, "msg": "Last message %.1fh ago" % dt_disp} + return self.__nagios_return__(self.MONITORING_HEARTBEAT, nagios.Nagios.WARNING, "Last message %.1fh ago" % dt_disp) else: - return {"status": nagios.Nagios.OK, "msg": "Last message %.1fh ago" % dt_disp} + return self.__nagios_return__(self.MONITORING_HEARTBEAT, nagios.Nagios.OK, "Last message %.1fh ago" % dt_disp) # # FOLLOW SETPOINT # - elif key == 'follow_setpoint': + elif key == self.MONITORING_FOLLOW_SETPOINT: if self.FOLLOW_KEY is None: - return {"status": nagios.Nagios.UNKNOWN, "msg": "Device exist, but does not follow any setpoint."} + return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.UNKNOWN, "Device exist, but does not follow any setpoint.", force=True) tm_s, value_s = self.__state_storage__.get(self.FOLLOW_KEY, (0, None)) try: tm_t, value_t = self.__target_storage__[self.FOLLOW_KEY] except KeyError: if value_s is not None: - return {"status": nagios.Nagios.WARNING, "msg": "Current temperature setpoint %.1f°C (age=%.1fmin), but never received a setpoint" % (value_s, (time.time()-tm_s)/60)} - return {"status": nagios.Nagios.UNKNOWN, "msg": "Device exists, but no data received"} + return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.WARNING, "Current temperature setpoint %.1f°C (age=%.1fmin), but never received a setpoint" % (value_s, (time.time()-tm_s)/60)) + return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.UNKNOWN, "Device exists, but no data received") else: tm = time.time() dt = tm - tm_t if value_t != value_s and dt > self.FOLLOW_REQUEST_ERROR: - return {"status": nagios.Nagios.ERROR, "msg": "Requested setpoint unequal valve setpoint %.1f°C since %.1fmin" % (value_s, (time.time()-tm_s)/60)} + return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.ERROR, "Requested setpoint unequal valve setpoint %.1f°C since %.1fmin" % (value_s, (time.time()-tm_s)/60)) elif value_t != value_s and dt > self.FOLLOW_REQUEST_WARNING: - return {"status": nagios.Nagios.WARNING, "msg": "Requested setpoint unequal valve setpoint %.1f°C since %.1fmin" % (value_s, (time.time()-tm_s))} - return {"status": nagios.Nagios.OK, "msg": "Requested setpoint equal valve setpoint %.1f°C" % value_s} + return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.WARNING, "Requested setpoint unequal valve setpoint %.1f°C since %.1fmin" % (value_s, (time.time()-tm_s))) + return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.OK, "Requested setpoint equal valve setpoint %.1f°C" % value_s) # # BATTERY # - elif key == "battery": + elif key == self.MONITORING_BATTERY: if self.battery is None: - return {"status": nagios.Nagios.UNKNOWN, "msg": "Device exists, but no data received or unknown monitoring"} + return self.__nagios_return__(self.MONITORING_BATTERY, nagios.Nagios.UNKNOWN, "Device exists, but no data received or unknown monitoring") elif self.battery <= self.BATTERY_LVL_ERROR: - return {"status": nagios.Nagios.ERROR, "msg": "Battery level critical low (%.1f%%)" % self.battery} + return self.__nagios_return__(self.MONITORING_BATTERY, nagios.Nagios.ERROR, "Battery level critical low (%.1f%%)" % self.battery) elif self.battery <= self.BATTERY_LVL_WARNING: - return {"status": nagios.Nagios.WARNING, "msg": "Battery level low (%.1f%%)" % self.battery} + return self.__nagios_return__(self.MONITORING_BATTERY, nagios.Nagios.WARNING, "Battery level low (%.1f%%)" % self.battery) else: - return {"status": nagios.Nagios.OK, "msg": "Battery okay (%.1f%%)" % self.battery} + return self.__nagios_return__(self.MONITORING_BATTERY, nagios.Nagios.OK, "Battery okay (%.1f%%)" % self.battery) + + def __nagios_return__(self, monitoring_name, status, msg, force=False): + tm = time.time() + if monitoring_name not in self.__unknown_tm__: + self.__unknown_tm__[monitoring_name] = None + if status == nagios.Nagios.UNKNOWN and not force: + if self.__unknown_tm__[monitoring_name] is None: + self.__unknown_tm__[monitoring_name] = tm + dt = tm - self.__unknown_tm__[monitoring_name] + if dt >= self.LAST_MSG_ERROR: + status = nagios.Nagios.UNKNOWN + elif dt >= self.LAST_MSG_WARNING: + status = nagios.Nagios.WARNING + else: + status = nagios.Nagios.OK + msg += " - since %.1fh" % (dt / 3600) + else: + self.__unknown_tm__[monitoring_name] = None + return {"status": status, "msg": msg} class group(object):