diff --git a/README.rst b/README.rst index 2854913..4160364 100644 --- a/README.rst +++ b/README.rst @@ -202,7 +202,11 @@ Changes ******* - Parse metrics from Nagios plugin output by default. If thresholds are - specified and metric parsing is disabled, an error is generated. + specified and metric parsing is disabled, an error is generated + (https://github.com/zc/cimaa/issues/12). + +- Add 'since' field to DynamoDB data store + (https://github.com/zc/cimaa/issues/18). 0.5.3 (2015-03-10) ================== diff --git a/src/zc/cimaa/dynamodb.py b/src/zc/cimaa/dynamodb.py index 8cffb95..9100feb 100644 --- a/src/zc/cimaa/dynamodb.py +++ b/src/zc/cimaa/dynamodb.py @@ -30,7 +30,7 @@ 'updated', data_type=boto.dynamodb2.types.NUMBER), ], - includes=['name', 'updated'], + includes=['name', 'since', 'updated'], ) ], ), @@ -39,6 +39,7 @@ class DB: def __init__(self, config, tables=tuple(schemas)): + # {agent: {name: fault_data}} self.last_faults = {} conn, prefix = connect(config) for name in schemas: @@ -56,7 +57,7 @@ def faults(): return [_fault_data(item) for item in self.faults.query_2(agent__eq=agent)] - self.last_faults[agent] = set(fault['name'] for fault in faults) + self.last_faults[agent] = {fault['name']: fault for fault in faults} return faults def set_faults(self, agent, faults): @@ -69,23 +70,30 @@ def set_faults(self, agent, faults): def write_faults(): self._set_faults(agent, faults, old_faults) - self.last_faults[agent] = set(fault['name'] for fault in faults) + self.last_faults[agent] = {fault['name']: fault for fault in faults} def _set_faults(self, agent, faults, old_faults): + now = int(time.time()) with self.faults.batch_write() as batch: - #print batch.__class__ # Heartbeat batch.put_item(dict( agent='_', name=agent, - updated=int(time.time()), + updated=now, )) for fault in faults: data = fault.copy() data['agent'] = agent + name = fault['name'] + if name in old_faults: + if 'since' not in old_faults[name]: + old_faults[name]['since'] = now + data['since'] = old_faults[name]['since'] + del old_faults[name] + else: + data['since'] = now batch.put_item(data, overwrite=True) - old_faults.discard(data['name']) for name in old_faults: batch.delete_item(agent=agent, name=name) @@ -171,7 +179,7 @@ def decorator(function): return decorator -def _convert_tstamp(data, name): +def _convert_timestamp(data, name): if name in data: try: data[name] = int(data[name]) @@ -184,7 +192,8 @@ def _fault_data(item): # dynamodb doesn't populate keys with empty strings if u'message' not in data: data[u'message'] = u'' - _convert_tstamp(data, u"updated") + _convert_timestamp(data, u"since") + _convert_timestamp(data, u"updated") if u'severity' in data: # Ints, not Decimals: data[u'severity'] = int(data[u'severity']) diff --git a/src/zc/cimaa/dynamodb.rst b/src/zc/cimaa/dynamodb.rst index cda7c7c..0e6726a 100644 --- a/src/zc/cimaa/dynamodb.rst +++ b/src/zc/cimaa/dynamodb.rst @@ -93,20 +93,23 @@ And perform some operations: {u'agent': u'agent', u'message': u'', u'name': u'blank', - u'severity': 50}, + u'severity': 50, + u'since': T}, {u'agent': u'agent', u'message': u'f2 is bad', u'name': u'f2', - u'severity': 40}, + u'severity': 40, + u'since': T}, {u'agent': u'agent', u'message': u'f3 is bad', u'name': u'f3', u'severity': 50, + u'since': T, u'triggered': u'y'}],... -Notice that the faults data includes data for an agent '_'. This is +Notice that the faults data includes data for an agent '_'. This is heartbeat data that tells us when the agent last ran. We can use this -to find old agents: +to find agents that no longer report: >>> db.old_agents(900) # agents that haven't run in 15 minutes [] @@ -118,15 +121,18 @@ to find old agents: [{u'agent': u'agent', u'message': u'', u'name': u'blank', - u'severity': 50}, + u'severity': 50, + u'since': T}, {u'agent': u'agent', u'message': u'f2 is bad', u'name': u'f2', - u'severity': 40}, + u'severity': 40, + u'since': T}, {u'agent': u'agent', u'message': u'f3 is bad', u'name': u'f3', u'severity': 50, + u'since': T, u'triggered': u'y'}] >>> db.set_faults('agent', []) @@ -149,7 +155,8 @@ has to ensure that it gets restored to avoid KeyErrors:: [{u'agent': u'agent', u'message': u'', u'name': u'blank', - u'severity': 50}] + u'severity': 50, + u'since': T}] The remove_agent method is used to remove an agent from the database completely; both faults and the heartbeat record are removed, while @@ -169,11 +176,13 @@ records for other agents are not touched: {u'agent': u'tnega', u'message': u'f1 is bad', u'name': u'f1', - u'severity': 40}, + u'severity': 40, + u'since': T}, {u'agent': u'tnega', u'message': u'f2 is bad', u'name': u'f2', - u'severity': 40}], + u'severity': 40, + u'since': T}], 'squelches': []} diff --git a/src/zc/cimaa/nagiosperf.py b/src/zc/cimaa/nagiosperf.py index 42d2f30..0c9f0bb 100644 --- a/src/zc/cimaa/nagiosperf.py +++ b/src/zc/cimaa/nagiosperf.py @@ -37,17 +37,28 @@ >>> ppp("| 'ha ha ha'=3has") ('\n', [{'name': "'ha ha ha'", 'units': 'has', 'value': 3.0}]) + >>> ppp("| 'ha ha ha'=-3has") + ('\n', [{'name': "'ha ha ha'", 'units': 'has', 'value': -3.0}]) + + >>> ppp("| 'ha ha ha'=+3has") + ('\n', [{'name': "'ha ha ha'", 'units': 'has', 'value': 3.0}]) + + >>> ppp( + ... "he he he; | m=2643MB;-5948;+5958;-6000;6000") + ('he he he; \n', + [{'name': 'm', 'units': 'MB', 'value': 2643.0}]) + See: https://nagios-plugins.org/doc/guidelines.html#AEN200 """ import re perf_parse = re.compile( - r"([^=' \t]+|'[^=']+')" # label - r"=" # = - r"(\d+(\.\d*)?|\.\d+)" # value - r"([^; \t]*)" # Units - r"(;(\d+(\.\d*)?|\.\d+)){0,4}" # warn, crit, min, max + r"([^=' \t]+|'[^=']+')" # label + r"=" # = + r"([-+]?\d+(\.\d*)?|[-+]?\.\d+)" # value + r"([^; \t]*)" # Units + r"(;([-+]?\d+(\.\d*)?|[-+]?\.\d+)){0,4}" # warn, crit, min, max ).findall def parse_output(text):