Browse Source

unknown state reduced to hearbeat monitoring timings and status

master
Dirk Alders 1 year ago
parent
commit
f6ab1dc4ce
1 changed files with 46 additions and 21 deletions
  1. 46
    21
      z_server/devices/__init__.py

+ 46
- 21
z_server/devices/__init__.py View File

12
 
12
 
13
 
13
 
14
 class base(object):
14
 class base(object):
15
+    MONITORING_HEARTBEAT = "heartbeat"
16
+    MONITORING_BATTERY = "battery"
17
+    MONITORING_FOLLOW_SETPOINT = "follow_setpoint"
18
+    #
15
     FOLLOW_REQUEST_WARNING = 5      # Seconds, till warning comes up, if device does not follow the command
19
     FOLLOW_REQUEST_WARNING = 5      # Seconds, till warning comes up, if device does not follow the command
16
     FOLLOW_REQUEST_ERROR = 60       # Seconds, till error comes up, if device does not follow the command
20
     FOLLOW_REQUEST_ERROR = 60       # Seconds, till error comes up, if device does not follow the command
17
     FOLLOW_KEY = None
21
     FOLLOW_KEY = None
19
     BATTERY_LVL_WARNING = 15
23
     BATTERY_LVL_WARNING = 15
20
     BATTERY_LVL_ERROR = 5
24
     BATTERY_LVL_ERROR = 5
21
     #
25
     #
22
-    LAST_MSG_WARNING = 6 * 24 * 60 * 60
23
-    LAST_MSG_ERROR = 24 * 24 * 60 * 60
26
+    LAST_MSG_WARNING = 6 * 60 * 60
27
+    LAST_MSG_ERROR = 24 * 60 * 60
24
 
28
 
25
     def __init__(self, mqtt_client: mqtt.mqtt_client, topic):
29
     def __init__(self, mqtt_client: mqtt.mqtt_client, topic):
26
         self.topic = topic
30
         self.topic = topic
27
         #
31
         #
32
+        self.__unknown_tm__ = {}
33
+        #
28
         mqtt_client.add_callback(topic, self.__rx__)
34
         mqtt_client.add_callback(topic, self.__rx__)
29
         mqtt_client.add_callback(topic + '/#', self.__rx__)
35
         mqtt_client.add_callback(topic + '/#', self.__rx__)
30
         #
36
         #
58
                 #
64
                 #
59
                 # battery level
65
                 # battery level
60
                 #
66
                 #
61
-                if "battery" in payload and message.topic == self.topic:
62
-                    self.battery = payload["battery"]
67
+                if self.MONITORING_BATTERY in payload and message.topic == self.topic:
68
+                    self.battery = payload[self.MONITORING_BATTERY]
63
 
69
 
64
     def target(self, key, value):
70
     def target(self, key, value):
65
         tm_t, value_t = self.__target_storage__.get(key, (0, None))
71
         tm_t, value_t = self.__target_storage__.get(key, (0, None))
75
         #
81
         #
76
         # HEARTBEAT
82
         # HEARTBEAT
77
         #
83
         #
78
-        if key == "heartbeat":
84
+        if key == self.MONITORING_HEARTBEAT:
79
             if self.last_device_msg is None:
85
             if self.last_device_msg is None:
80
-                return {"status": nagios.Nagios.UNKNOWN, "msg": "Device exists, but no data received or unknown monitoring"}
86
+                return self.__nagios_return__(self.MONITORING_HEARTBEAT, nagios.Nagios.UNKNOWN, "Device exists, but no data received")
81
             else:
87
             else:
82
                 dt = time.time() - self.last_device_msg
88
                 dt = time.time() - self.last_device_msg
83
                 dt_disp = dt / 60 / 60
89
                 dt_disp = dt / 60 / 60
84
                 if dt > self.LAST_MSG_ERROR:
90
                 if dt > self.LAST_MSG_ERROR:
85
-                    return {"status": nagios.Nagios.ERROR, "msg": "Last message %.1fh ago" % dt_disp}
91
+                    return self.__nagios_return__(self.MONITORING_HEARTBEAT, nagios.Nagios.ERROR, "Last message %.1fh ago" % dt_disp)
86
                 elif dt > self.LAST_MSG_WARNING:
92
                 elif dt > self.LAST_MSG_WARNING:
87
-                    return {"status": nagios.Nagios.WARNING, "msg": "Last message %.1fh ago" % dt_disp}
93
+                    return self.__nagios_return__(self.MONITORING_HEARTBEAT, nagios.Nagios.WARNING, "Last message %.1fh ago" % dt_disp)
88
                 else:
94
                 else:
89
-                    return {"status": nagios.Nagios.OK, "msg": "Last message %.1fh ago" % dt_disp}
95
+                    return self.__nagios_return__(self.MONITORING_HEARTBEAT, nagios.Nagios.OK, "Last message %.1fh ago" % dt_disp)
90
         #
96
         #
91
         # FOLLOW SETPOINT
97
         # FOLLOW SETPOINT
92
         #
98
         #
93
-        elif key == 'follow_setpoint':
99
+        elif key == self.MONITORING_FOLLOW_SETPOINT:
94
             if self.FOLLOW_KEY is None:
100
             if self.FOLLOW_KEY is None:
95
-                return {"status": nagios.Nagios.UNKNOWN, "msg": "Device exist, but does not follow any setpoint."}
101
+                return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.UNKNOWN, "Device exist, but does not follow any setpoint.", force=True)
96
             tm_s, value_s = self.__state_storage__.get(self.FOLLOW_KEY, (0, None))
102
             tm_s, value_s = self.__state_storage__.get(self.FOLLOW_KEY, (0, None))
97
             try:
103
             try:
98
                 tm_t, value_t = self.__target_storage__[self.FOLLOW_KEY]
104
                 tm_t, value_t = self.__target_storage__[self.FOLLOW_KEY]
99
             except KeyError:
105
             except KeyError:
100
                 if value_s is not None:
106
                 if value_s is not None:
101
-                    return {"status": nagios.Nagios.WARNING, "msg": "Current temperature setpoint %.1f°C (age=%.1fmin), but never received a setpoint" % (value_s, (time.time()-tm_s)/60)}
102
-                return {"status": nagios.Nagios.UNKNOWN, "msg": "Device exists, but no data received"}
107
+                    return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.WARNING, "Current temperature setpoint %.1f°C (age=%.1fmin), but never received a setpoint" % (value_s, (time.time()-tm_s)/60))
108
+                return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.UNKNOWN, "Device exists, but no data received")
103
             else:
109
             else:
104
                 tm = time.time()
110
                 tm = time.time()
105
                 dt = tm - tm_t
111
                 dt = tm - tm_t
106
                 if value_t != value_s and dt > self.FOLLOW_REQUEST_ERROR:
112
                 if value_t != value_s and dt > self.FOLLOW_REQUEST_ERROR:
107
-                    return {"status": nagios.Nagios.ERROR, "msg": "Requested setpoint unequal valve setpoint %.1f°C since %.1fmin" % (value_s, (time.time()-tm_s)/60)}
113
+                    return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.ERROR, "Requested setpoint unequal valve setpoint %.1f°C since %.1fmin" % (value_s, (time.time()-tm_s)/60))
108
                 elif value_t != value_s and dt > self.FOLLOW_REQUEST_WARNING:
114
                 elif value_t != value_s and dt > self.FOLLOW_REQUEST_WARNING:
109
-                    return {"status": nagios.Nagios.WARNING, "msg": "Requested setpoint unequal valve setpoint %.1f°C since %.1fmin" % (value_s, (time.time()-tm_s))}
110
-                return {"status": nagios.Nagios.OK, "msg": "Requested setpoint equal valve setpoint %.1f°C" % value_s}
115
+                    return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.WARNING, "Requested setpoint unequal valve setpoint %.1f°C since %.1fmin" % (value_s, (time.time()-tm_s)))
116
+                return self.__nagios_return__(self.MONITORING_FOLLOW_SETPOINT, nagios.Nagios.OK, "Requested setpoint equal valve setpoint %.1f°C" % value_s)
111
         #
117
         #
112
         # BATTERY
118
         # BATTERY
113
         #
119
         #
114
-        elif key == "battery":
120
+        elif key == self.MONITORING_BATTERY:
115
             if self.battery is None:
121
             if self.battery is None:
116
-                return {"status": nagios.Nagios.UNKNOWN, "msg": "Device exists, but no data received or unknown monitoring"}
122
+                return self.__nagios_return__(self.MONITORING_BATTERY, nagios.Nagios.UNKNOWN, "Device exists, but no data received or unknown monitoring")
117
             elif self.battery <= self.BATTERY_LVL_ERROR:
123
             elif self.battery <= self.BATTERY_LVL_ERROR:
118
-                return {"status": nagios.Nagios.ERROR, "msg": "Battery level critical low (%.1f%%)" % self.battery}
124
+                return self.__nagios_return__(self.MONITORING_BATTERY, nagios.Nagios.ERROR, "Battery level critical low (%.1f%%)" % self.battery)
119
             elif self.battery <= self.BATTERY_LVL_WARNING:
125
             elif self.battery <= self.BATTERY_LVL_WARNING:
120
-                return {"status": nagios.Nagios.WARNING, "msg": "Battery level low (%.1f%%)" % self.battery}
126
+                return self.__nagios_return__(self.MONITORING_BATTERY, nagios.Nagios.WARNING, "Battery level low (%.1f%%)" % self.battery)
121
             else:
127
             else:
122
-                return {"status": nagios.Nagios.OK, "msg": "Battery okay (%.1f%%)" % self.battery}
128
+                return self.__nagios_return__(self.MONITORING_BATTERY, nagios.Nagios.OK, "Battery okay (%.1f%%)" % self.battery)
129
+
130
+    def __nagios_return__(self, monitoring_name, status, msg, force=False):
131
+        tm = time.time()
132
+        if monitoring_name not in self.__unknown_tm__:
133
+            self.__unknown_tm__[monitoring_name] = None
134
+        if status == nagios.Nagios.UNKNOWN and not force:
135
+            if self.__unknown_tm__[monitoring_name] is None:
136
+                self.__unknown_tm__[monitoring_name] = tm
137
+            dt = tm - self.__unknown_tm__[monitoring_name]
138
+            if dt >= self.LAST_MSG_ERROR:
139
+                status = nagios.Nagios.UNKNOWN
140
+            elif dt >= self.LAST_MSG_WARNING:
141
+                status = nagios.Nagios.WARNING
142
+            else:
143
+                status = nagios.Nagios.OK
144
+            msg += " - since %.1fh" % (dt / 3600)
145
+        else:
146
+            self.__unknown_tm__[monitoring_name] = None
147
+        return {"status": status, "msg": msg}
123
 
148
 
124
 
149
 
125
 class group(object):
150
 class group(object):

Loading…
Cancel
Save