cold
This commit is contained in:
0
AoM_Service/library/serviceapp/__init__.py
Executable file
0
AoM_Service/library/serviceapp/__init__.py
Executable file
189
AoM_Service/library/serviceapp/alert.py
Executable file
189
AoM_Service/library/serviceapp/alert.py
Executable file
@@ -0,0 +1,189 @@
|
||||
from thresholds import Thresholds
|
||||
|
||||
class Alert() :
|
||||
def __init__(self, alert_config, logger, tags, result, min_value, max_value) :
|
||||
self.occurrences_breached = False
|
||||
self.new_level_breached = False
|
||||
self.info = logger.info
|
||||
self.debug = logger.debug
|
||||
self.warning = logger.warning
|
||||
self.error = logger.error
|
||||
self.alert_config = alert_config
|
||||
self.thresholds = Thresholds(alert_config)
|
||||
self.tags = ""
|
||||
self.result = result
|
||||
self.set_tags(tags)
|
||||
self.alert_config.init_for_tags(alert_config.get_tags())
|
||||
self.set_firing(min_value, max_value)
|
||||
if availability :
|
||||
self.info("Sending availability stat 1")
|
||||
self.send_metrics(self.name(), 0 if self.level() == "CRITICAL" else 1, self.result, 'service_level')
|
||||
|
||||
def name(self) :
|
||||
return "Metric: {} for {}".format(self.alert_config.id, self.get_tags())
|
||||
|
||||
def body(self) :
|
||||
body = ""
|
||||
if not self.get_firing() :
|
||||
body = self.get_not_firing_body()
|
||||
else :
|
||||
body = self.get_is_firing_body()
|
||||
self.debug("Alert {}->[{}]->{}, Occurrences={} of {}".format(
|
||||
self.name(),
|
||||
self.get_tags(),
|
||||
self.level(),
|
||||
self.get_occurrences(),
|
||||
self.alert_config.occurrences(),
|
||||
))
|
||||
self.send_metrics(self.name(), self.level_code(), self.level())
|
||||
# TODO
|
||||
return body, md5(tag.encode('utf-8')).hexdigest()[:10]
|
||||
|
||||
def level(self) :
|
||||
if not self.get_firing() :
|
||||
return "RECOVERY"
|
||||
if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.CRITICAL)] :
|
||||
return "CRITICAL"
|
||||
if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.WARNING)] :
|
||||
return "WARNING"
|
||||
|
||||
def level_code(self) :
|
||||
level = self.level()
|
||||
if level == "RECOVERY" :
|
||||
return 0
|
||||
elif level == "WARNING" :
|
||||
return 0
|
||||
elif level == "CRITICAL" :
|
||||
return 0
|
||||
|
||||
def get_not_firing_body(self) :
|
||||
body = ""
|
||||
body += get_not_firing_body_threshold()
|
||||
body += get_not_firing_body_occurrences()
|
||||
if not body :
|
||||
self.alert_config.set_for_tags(self.get_tags()+"_count", force)
|
||||
return ""
|
||||
return "GOOD: " + body
|
||||
|
||||
def get_not_firing_body_threshold(self) :
|
||||
if self.result is None :
|
||||
return ""
|
||||
body = ""
|
||||
v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=True)
|
||||
if not ok :
|
||||
v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=False)
|
||||
if ok :
|
||||
body += self.form("<", v)
|
||||
v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=True)
|
||||
if not ok :
|
||||
v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=False)
|
||||
if ok :
|
||||
body += self.form(">", v)
|
||||
return body
|
||||
|
||||
def get_not_firing_body_occurrences(self) :
|
||||
if not self.get_occurrences() :
|
||||
return ""
|
||||
body = ""
|
||||
if not self.result is None :
|
||||
self.send_metrics(self.name(), 1, self.level())
|
||||
else :
|
||||
body += "{} RECOVERY due to no results found from query. Recommend you manually validate recovery\n{}".format(self.name(), self.alert_config.url())
|
||||
self.set_occurrences(force=0)
|
||||
return body
|
||||
|
||||
def get_is_firing_body(self) :
|
||||
body = ""
|
||||
if self.thresholds.get_breached(level=Thresholds.UPPER) :
|
||||
body += self.form(">", self.upper_firing)
|
||||
if self.thresholds.get_breached(level=Thresholds.LOWER) :
|
||||
body += self.form("<", self.upper_firing)
|
||||
if self.occurrences_breached :
|
||||
self.debug("Value {} of {} for tag {} has occurred {} time(s) < threshold of {}".format(
|
||||
self.value,
|
||||
self.name(),
|
||||
self.get_tags(),
|
||||
self.get_occurrences(),
|
||||
self.alert_config.occurrences(),
|
||||
))
|
||||
return ""
|
||||
return body
|
||||
|
||||
def form(self, operator, static) :
|
||||
return "{}\n{:.2f} {}= {}\n{}".format(
|
||||
self.name(),
|
||||
self.value,
|
||||
operator,
|
||||
static,
|
||||
self.alert_config.url(),
|
||||
)
|
||||
|
||||
def set_tags(self, tags) :
|
||||
if tags :
|
||||
self.tags = tags
|
||||
elif self.result :
|
||||
import itertools
|
||||
result_tags = [ self.result['tags'][x] for x in self.alert_config.get_tags() ]
|
||||
chain = itertools.chain(result_tags)
|
||||
sorted_list = sorted(list(chain))
|
||||
self.tags = ", ".join(sorted_list)
|
||||
if not self.tags :
|
||||
self.tags = "instance"
|
||||
|
||||
def get_tags(self) :
|
||||
return self.tags
|
||||
|
||||
def set_firing(self, min_value, max_value) :
|
||||
self.thresholds = Thresholds(self.alert_config)
|
||||
self.thresholds.set_breached(min_value, max_value)
|
||||
self.set_occurrences()
|
||||
self.set_new_level_breached()
|
||||
self.send_metrics()
|
||||
self.send_threshold_metrics()
|
||||
|
||||
def get_firing(self) :
|
||||
return self.thresholds.get_breached() and self.occurrences_breached
|
||||
|
||||
def get_occurrences(self) :
|
||||
tags = self.get_tags()
|
||||
return self.alert_config.get_for_tags(tags)
|
||||
|
||||
def set_occurrences(self, force=None) :
|
||||
previous_occurrences = self.get_occurrences()
|
||||
if self.thresholds.get_breached() :
|
||||
new_occurrences = previous_occurrences+1
|
||||
self.alert_config.set_for_tags(self.get_tags(), new_occurrences)
|
||||
self.occurrences_breached = self.alert_config.occurrences() <= new_occurrences
|
||||
if force :
|
||||
self.alert_config.set_for_tags(self.get_tags(), force)
|
||||
self.alert_config.set_for_tags(self.get_tags()+"_count", force)
|
||||
|
||||
def send_metrics(self, *args, **kwargs) :
|
||||
print("send_metrics not impl")
|
||||
|
||||
def set_new_level_breached(self) :
|
||||
key = self.get_tags()
|
||||
level = self.level()
|
||||
previous_level = self.alert_config.get_level(key)
|
||||
self.new_level_breached = level != previous_level
|
||||
self.alert_config.set_level(key, level)
|
||||
self.info("testInfo: {} {}".format(
|
||||
"NEW" if self.new_level_breached else "EXISTING",
|
||||
self.level(),
|
||||
))
|
||||
|
||||
def get_new_level_breached(self) :
|
||||
return self.new_level_breached
|
||||
|
||||
def send_threshold_metrics(self) :
|
||||
# TODO
|
||||
self.send_metrics(self.alert_config.id, self.value)
|
||||
for level in [Thresholds.WARNING, Thresholds.CRITICAL] :
|
||||
for end in [Thresholds.UPPER, Thresholds.LOWER] :
|
||||
v, ok = self.alert_config.get_threshold(isUpper=level == Thresholds.UPPER, isWarning=end == Thresholds.WARNING)
|
||||
if ok :
|
||||
key = "{}_{}_threshold".format(
|
||||
"upper" if level == Thresholds.UPPER else "lower",
|
||||
"warning" if level == Thresholds.WARNING else "critical",
|
||||
)
|
||||
self.send_stat(key, v, {'id':self.name()})
|
||||
13
AoM_Service/library/serviceapp/alert_factory.py
Executable file
13
AoM_Service/library/serviceapp/alert_factory.py
Executable file
@@ -0,0 +1,13 @@
|
||||
from alert import Alert
|
||||
|
||||
class Alert_Factory() :
|
||||
def __init__(self, alert_config, logger) :
|
||||
self.alert_config = alert_config
|
||||
self.logger = logger
|
||||
self.info = logger.info
|
||||
self.warning = logger.warning
|
||||
self.debug = logger.debug
|
||||
self.error = logger.error
|
||||
|
||||
def build(self, minvalue, maxvalue, result, tags, availability, alert_tags) :
|
||||
return Alert(self.alert_config, tags, result, minvalue, maxvalue)
|
||||
83
AoM_Service/library/serviceapp/prom_api.py
Executable file
83
AoM_Service/library/serviceapp/prom_api.py
Executable file
@@ -0,0 +1,83 @@
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class PromAPI:
|
||||
def __init__(self, endpoint='http://127.0.0.1:9090/'):
|
||||
"""
|
||||
:param endpoint: address of
|
||||
"""
|
||||
self.endpoint = endpoint
|
||||
|
||||
@staticmethod
|
||||
def _to_timestamp(input_):
|
||||
"""
|
||||
Convert string input to UNIX timestamp for Prometheus
|
||||
:param input_:
|
||||
:return:
|
||||
"""
|
||||
if type(input_) == datetime:
|
||||
return input_.timestamp()
|
||||
if input_ == 'now':
|
||||
return datetime.utcnow().isoformat('T')
|
||||
if type(input_) is str:
|
||||
input_ = float(input_)
|
||||
if type(input_) in [int, float]:
|
||||
if input_ > 0:
|
||||
return input_
|
||||
if input_ == 0: # return now
|
||||
return datetime.utcnow().isoformat('T')
|
||||
if input_ < 0:
|
||||
return (datetime.utcnow() + timedelta(seconds=input_)).isoformat('T')
|
||||
#assert type(input_) == float
|
||||
|
||||
def query(self, query='prometheus_build_info'):
|
||||
return self._get(
|
||||
uri='/api/v1/query',
|
||||
params=dict(
|
||||
query=query
|
||||
)
|
||||
)
|
||||
|
||||
def query_range(self, query='prometheus_build_info', start=-60, end='now', duration=60):
|
||||
"""Get ser"""
|
||||
params = {
|
||||
'query': query
|
||||
}
|
||||
if end is not None:
|
||||
params['end'] = self._to_timestamp(end) + 'Z'
|
||||
if start:
|
||||
params['start'] = self._to_timestamp(start) + 'Z'
|
||||
if duration:
|
||||
params['step'] = duration
|
||||
print(params)
|
||||
return self._get(
|
||||
uri='/api/v1/query_range',
|
||||
params=params
|
||||
)
|
||||
|
||||
def series(self, match='prometheus_build_info', start=-86400, end='now'):
|
||||
"""Get ser"""
|
||||
params = {
|
||||
'match[]': match
|
||||
}
|
||||
if end is not None:
|
||||
params['end'] = self._to_timestamp(end) + 'Z'
|
||||
if start:
|
||||
params['start'] = self._to_timestamp(start) + 'Z'
|
||||
print(params)
|
||||
return self._get(
|
||||
uri='/api/v1/series',
|
||||
params=params
|
||||
)
|
||||
|
||||
def _get(self, uri, params, method='GET'):
|
||||
url = urljoin(self.endpoint, uri)
|
||||
assert method == 'GET'
|
||||
result = requests.get(
|
||||
url=url,
|
||||
params=params
|
||||
)
|
||||
return result.json()
|
||||
949
AoM_Service/library/serviceapp/service.py
Executable file
949
AoM_Service/library/serviceapp/service.py
Executable file
@@ -0,0 +1,949 @@
|
||||
""" Alert On Metrics functions"""
|
||||
|
||||
import copy
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import smtplib
|
||||
from email.mime.text import MIMEText
|
||||
from socket import gaierror
|
||||
from time import sleep
|
||||
from hashlib import md5
|
||||
import requests
|
||||
from statsd import StatsClient
|
||||
from serviceapp.prom_api import PromAPI
|
||||
|
||||
alert_status = [
|
||||
'RECOVERY',
|
||||
'WARNING',
|
||||
'WARNING',
|
||||
'CRITICAL',
|
||||
'CRITICAL',
|
||||
'CRITICAL']
|
||||
|
||||
|
||||
def build_alert_message(alert, minvalue, maxvalue, result, logger,
|
||||
availability, tag=None, alert_tags=None):
|
||||
"""
|
||||
Build the alert message
|
||||
Args:
|
||||
alert: the alert object that includes a tag definition
|
||||
minvalue: the min value to test against the threshold
|
||||
maxvalue: the max value to test against the threshold
|
||||
result: the response back from kairosdb
|
||||
logger (log object): does the logging
|
||||
availability: Send availability stat 1
|
||||
tag: If passed in will use this value for the tag instead of
|
||||
getting it from the result object
|
||||
alert_tags: the tags corresponding to the result, used if an
|
||||
alert has to be triggered and a custom routing per tag is configured
|
||||
Returns:
|
||||
Alert message string
|
||||
"""
|
||||
# DEFAULT TO MAX VALUE AS THE VALUE WE WILL ALERT ON. LOGIC BELOW
|
||||
# MAY CHANGE THIS.
|
||||
# value = maxvalue
|
||||
# # HANDLE THE CASE WHERE SOMEONE HAS NOT SPECIFIED ANY TAGS IN THEIR QUERY
|
||||
# # (USUALLY A GLOBAL ALL-DC QUERY)
|
||||
# if tag is None and result is not None:
|
||||
# tag = ', '.join(sorted(list(itertools.chain(
|
||||
# *[result['tags'][x] for x in alert['tags']]))))
|
||||
# tag_count = tag + "_count"
|
||||
# WE WILL USE THIS ONE LATER FOR TRACKING OCCURRENCES OF KAIROSDB NOT
|
||||
# RETURNING RESULTS
|
||||
# tag_noresult = tag + "_noresult"
|
||||
# if not tag:
|
||||
# tag = 'instance'
|
||||
# logger.debug("No tag specified for alert {}".format(alert['id']))
|
||||
# INSTEAD OF TRYING TO HANDLE LOGIC WHERE THESE ARE NOT IN THE OBJECT, PUT
|
||||
# THEM IN AS SOON AS THEY ARE CREATED SO THAT ON FIRST RUN AN ALERT HAS ALL
|
||||
# THE ALERT['alert_tags'][TAG] AND ALERT['alert_tags'][TAG_COUNT] NEEDED
|
||||
# if 'alert_tags' not in alert:
|
||||
# alert['alert_tags'] = {}
|
||||
# if tag not in alert['alert_tags']:
|
||||
# alert['alert_tags'][tag] = 0
|
||||
# if tag_count not in alert['alert_tags']:
|
||||
# alert['alert_tags'][tag_count] = 0
|
||||
# IF WE HIT THIS FUNCTION THEN WE ALWAYS SET (OR RESET) THIS NORESULT
|
||||
# COUNTER TO 0 IE. IF WE ARE HERE IT IMPLIES WE HAVE A RESULT FROM
|
||||
# KAIROSDB OR WE ARE AT THE END OF A LONG PERIOD OF NORESULTS WHERE WE ARE
|
||||
# CLEARING EVERYTHING OUT ANYWAY
|
||||
# alert['alert_tags'][tag_noresult] = 0
|
||||
|
||||
# # FIRST FIND OUT WHAT THRESHOLDS ARE SET AND HAVE BEEN BREACHED
|
||||
# upper_critical_threshold = None
|
||||
# upper_warning_threshold = None
|
||||
# lower_warning_threshold = None
|
||||
# lower_critical_threshold = None
|
||||
# upper_threshold = None
|
||||
# lower_threshold = None
|
||||
# is_warning_alarm = False
|
||||
# is_critical_alarm = False
|
||||
|
||||
# # UPPER
|
||||
# upper_threshold_exists = False
|
||||
# upper_warning_threshold_breached = False
|
||||
# upper_critical_threshold_breached = False
|
||||
# if 'warning_upper_threshold' in alert:
|
||||
# upper_threshold_exists = True
|
||||
# upper_warning_threshold = alert['warning_upper_threshold']
|
||||
# upper_threshold = upper_warning_threshold
|
||||
# if maxvalue >= upper_warning_threshold:
|
||||
# upper_warning_threshold_breached = True
|
||||
# is_warning_alarm = True
|
||||
# if 'critical_upper_threshold' in alert:
|
||||
# upper_critical_threshold = alert['critical_upper_threshold']
|
||||
# if not upper_threshold_exists:
|
||||
# upper_threshold = upper_critical_threshold
|
||||
# upper_threshold_exists = True
|
||||
# # IF CONFIG HAS A CRITICAL THRESHOLD SET AND WE PASS THAT THEN THAT IS
|
||||
# # OUR THRESHOLD FOR ALERTING
|
||||
# if maxvalue >= alert['critical_upper_threshold']:
|
||||
# upper_threshold = upper_critical_threshold
|
||||
# upper_critical_threshold_breached = True
|
||||
# is_critical_alarm = True
|
||||
# upper_threshold_breached = (upper_warning_threshold_breached
|
||||
# or upper_critical_threshold_breached)
|
||||
|
||||
# # LOWER
|
||||
# lower_threshold_exists = False
|
||||
# lower_warning_threshold_breached = False
|
||||
# lower_critical_threshold_breached = False
|
||||
# if 'warning_lower_threshold' in alert:
|
||||
# lower_threshold_exists = True
|
||||
# lower_warning_threshold = alert['warning_lower_threshold']
|
||||
# lower_threshold = lower_warning_threshold
|
||||
# if minvalue <= lower_warning_threshold:
|
||||
# lower_warning_threshold_breached = True
|
||||
# is_warning_alarm = True
|
||||
# if 'critical_lower_threshold' in alert:
|
||||
# lower_critical_threshold = alert['critical_lower_threshold']
|
||||
# if not lower_threshold_exists:
|
||||
# lower_threshold = lower_critical_threshold
|
||||
# lower_threshold_exists = True
|
||||
# # IF CONFIG HAS A CRITICAL THRESHOLD SET AND WE PASS THAT THEN THAT AS
|
||||
# # OUR THRESHOLD FOR ALERTING
|
||||
# if minvalue <= lower_critical_threshold:
|
||||
# lower_threshold = lower_critical_threshold
|
||||
# lower_critical_threshold_breached = True
|
||||
# is_critical_alarm = True
|
||||
# lower_threshold_breached = (lower_warning_threshold_breached or
|
||||
# lower_critical_threshold_breached)
|
||||
|
||||
# # THIS HAS TO MEAN THERE IS A PROBLEM WITH THE ALERT CONFIG
|
||||
# if lower_threshold is None and upper_threshold is None:
|
||||
# logger.debug(
|
||||
# "ERROR: alert {} does not have any thresholds set on {}".format(
|
||||
# alert['id'], tag))
|
||||
|
||||
# # ON TO OCCURRENCES
|
||||
# if 'occurrences_threshold' in alert:
|
||||
# occurrences_threshold = alert['occurrences_threshold']
|
||||
# else:
|
||||
# occurrences_threshold = 1
|
||||
|
||||
# alert_entity = "Metric: {} for {}".format(alert['id'], tag)
|
||||
|
||||
# if 'url' not in alert:
|
||||
# alert['url'] = os.environ['AOM_GRAFANA_URL'] + str(alert['id'])
|
||||
|
||||
# ====================
|
||||
# PREPARE ALERT BODY STRING AND SET THE VALUE WE WILL USE TO ALERT WITH
|
||||
# ====================
|
||||
# alert_body = ''
|
||||
# if upper_threshold_breached:
|
||||
# alert_body = "{}\n{:.2f} >= {}\n{}".format(
|
||||
# alert_entity, value, upper_threshold, alert['url'])
|
||||
# if lower_threshold_breached:
|
||||
# value = minvalue
|
||||
# alert_body = "{}\n{:.2f} <= {}\n{}".format(
|
||||
# alert_entity, value, lower_threshold, alert['url'])
|
||||
|
||||
# SEND SOME STATS OUT AT THIS POINT AS WE KNOW WHERE WE ARE NOW. SEND THE
|
||||
# THRESHOLDS TOO SO THEY CAN BE GRAPHED
|
||||
### BREEL TODO ###
|
||||
# if result is not None:
|
||||
# send_metrics(alert, value, result)
|
||||
# if 'critical_upper_threshold' in alert:
|
||||
# send_stat('upper_critical_threshold', upper_critical_threshold,
|
||||
# {'id': alert['id']})
|
||||
# if 'warning_upper_threshold' in alert:
|
||||
# send_stat('upper_warning_threshold', upper_warning_threshold,
|
||||
# {'id': alert['id']})
|
||||
# if 'critical_lower_threshold' in alert:
|
||||
# send_stat('lower_critical_threshold', lower_critical_threshold,
|
||||
# {'id': alert['id']})
|
||||
# if 'warning_lower_threshold' in alert:
|
||||
# send_stat('lower_warning_threshold', lower_warning_threshold,
|
||||
# {'id': alert['id']})
|
||||
|
||||
# ====================
|
||||
# APPLY OUR LOGIC TO MAKE SOME DECISIONS
|
||||
# ====================
|
||||
#current_alert_status = alert_status[0]
|
||||
#if not lower_threshold_breached and not upper_threshold_breached:
|
||||
# # if result is not None:
|
||||
# # if lower_threshold_exists and not upper_threshold_exists:
|
||||
# # alert_body = "{}\n{:.2f} > {}\n{}".format(
|
||||
# # alert_entity, value, lower_threshold, alert['url'])
|
||||
# # logger.debug("GOOD: alert {} is higher than lower threshold {}"
|
||||
# # "for value {} on tag {}".format(
|
||||
# # alert['id'], lower_threshold, value, tag))
|
||||
# # if upper_threshold_exists and not lower_threshold_exists:
|
||||
# # alert_body = "{}\n{:.2f} < {}\n{}".format(
|
||||
# # alert_entity, value, upper_threshold, alert['url'])
|
||||
# # logger.debug("GOOD: alert {} is below the upper threshold {} "
|
||||
# # "for value {} on tag {}".format(
|
||||
# # alert['id'], upper_threshold, value, tag))
|
||||
# # if upper_threshold_exists and lower_threshold_exists:
|
||||
# # alert_body = "{}\n{} < {:.2f} < {}\n{}".format(
|
||||
# # alert_entity, lower_threshold, value, upper_threshold,
|
||||
# # alert['url'])
|
||||
# # logger.debug("GOOD: alert {} is between thresholds {} and {} "
|
||||
# # "for value {} on tag {}".format(
|
||||
# # alert['id'], upper_threshold, lower_threshold,
|
||||
# # value, tag))
|
||||
# # CHECK AND SEE IF TAG LOGIC IS SET, IE. WE WERE PREVIOUSLY IN ALARM
|
||||
# # STATE
|
||||
# #if alert['alert_tags'][tag] > 0:
|
||||
# # if result is not None:
|
||||
# # send_metrics(alert, 1, result, current_alert_status)
|
||||
# # logger.info(
|
||||
# # "TestInfo: RECOVERY: Clearing values for [{}] - {}".format(
|
||||
# # alert['id'], tag))
|
||||
# # if result is None:
|
||||
# # alert_body = ("{} RECOVERY due to no results found from "
|
||||
# # "KairosDB query. Recommend you manually validate"
|
||||
# # "recovery.\n{}").format(
|
||||
# # alert_entity, alert['url'])
|
||||
# # alert['alert_tags'][tag] = 0
|
||||
# # alert['alert_tags'][tag_count] = 0
|
||||
# # if availability:
|
||||
# # logger.info("Sending availability stat 1")
|
||||
# # send_metrics(alert, 1, result, 'service_level')
|
||||
# #else:
|
||||
# # # WE RETURN NONE IF NO ALERT (EITHER RECOVERY OR WARNING OR
|
||||
# # # CRITICAL) NEEDS TO BE FIRED
|
||||
# # alert['alert_tags'][tag_count] = 0
|
||||
# # if availability:
|
||||
# # logger.info("Sending availability stat 1")
|
||||
# # send_metrics(alert, 1, result, 'service_level')
|
||||
# # return None
|
||||
#else:
|
||||
### BREEL WORKING HERE ###
|
||||
# ====================
|
||||
# SET KEY / VALUE FOR TAG ON ALERT
|
||||
# 0 == No Alert
|
||||
# 1 == Warning
|
||||
# 2 == Existing Warning Alert
|
||||
# 3 == New Critical
|
||||
# 4+ == Existing Critical Alert
|
||||
# ====================
|
||||
# CHECK IF TAG_COUNT HAS BEEN SET, IF NOT SET IT, IF SO INCREMENT IT
|
||||
# alert['alert_tags'][tag_count] += 1
|
||||
|
||||
# ALERT WONT FIRE UNLESS THE TAG_COUNT IS MORE THAN THE OCCURRENCES,
|
||||
# THAT BEING EITHER 1 OR WHATEVER WAS SET ALERT HAS EXCEEDED
|
||||
# OCCURRENCES SO RETURN IT
|
||||
# TODO this doesnt belog in Alert.py
|
||||
#if alert['alert_tags'][tag_count] >= occurrences_threshold:
|
||||
# # >= 4 MEANS THIS IS A KNOWN CRITICAL, SO NO-OP
|
||||
# if alert['alert_tags'][tag] < 4:
|
||||
# if is_warning_alarm and not is_critical_alarm:
|
||||
# # THIS HANDLES GOING STRAIGHT FROM NORMAL TO WARNING LEVEL
|
||||
# if alert['alert_tags'][tag] == 0:
|
||||
# # NEW WARNING
|
||||
# alert['alert_tags'][tag] = 1
|
||||
# logger.info("TestInfo: WARNING (NEW): {} - {}".format(
|
||||
# alert['id'], tag))
|
||||
# else:
|
||||
# # EXISTING WARNING
|
||||
# alert['alert_tags'][tag] = 2
|
||||
# logger.info("TestInfo: WARNING (EXISTING): {} - {}".format(
|
||||
# alert['id'], tag))
|
||||
# if is_critical_alarm:
|
||||
# # THIS HANDLES GOING FROM WARNING LEVEL TO CRITICAL LEVEL
|
||||
# if (alert['alert_tags'][tag] == 1 or
|
||||
# alert['alert_tags'][tag] == 2):
|
||||
# alert['alert_tags'][tag] = 3
|
||||
# logger.info("TestInfo: CRITICAL (WAS WARNING): {} - {}".format(
|
||||
# alert['id'], tag))
|
||||
# else:
|
||||
# # THIS HANDLES GOING STRAIGHT FROM NORMAL TO CRITICAL
|
||||
# # LEVEL
|
||||
# if alert['alert_tags'][tag] < 3:
|
||||
# # NEW CRITICAL
|
||||
# alert['alert_tags'][tag] = 3
|
||||
# logger.info("TestInfo: CRITICAL (NEW): {} - {}".format(
|
||||
# alert['id'], tag))
|
||||
# else:
|
||||
# # EXISTING CRITICAL
|
||||
# alert['alert_tags'][tag] = 4
|
||||
# logger.info("TestInfo: CRITICAL (EXISTING): {} - {}".format(
|
||||
# alert['id'], tag))
|
||||
# RECORD THE FACT THAT SOMETHING IS STILL IN ALARM STATE IN METRICS
|
||||
# EVEN IF NOT ACTIVELY ALERTING ON IT
|
||||
# #if is_critical_alarm:
|
||||
# #current_alert_status = alert_status[3]
|
||||
# #send_metrics(alert, 2, result, current_alert_status)
|
||||
# #if availability:
|
||||
# # logger.info("Sending availability stat 0")
|
||||
# # send_metrics(alert, 0, result, 'service_level')
|
||||
# #if is_warning_alarm and not is_critical_alarm:
|
||||
# #current_alert_status = alert_status[1]
|
||||
# #send_metrics(alert, 1, result, current_alert_status)
|
||||
# #if availability:
|
||||
# # logger.info("Sending availability stat 1")
|
||||
# # send_metrics(alert, 1, result, 'service_level')
|
||||
# logger.debug("{} alert for value {} of {} for tag {} has occurred "
|
||||
# "{} times. Threshold is >= {} times.".format(
|
||||
# current_alert_status,
|
||||
# value,
|
||||
# alert['id'],
|
||||
# tag,
|
||||
# alert['alert_tags'][tag_count],
|
||||
# occurrences_threshold))
|
||||
# else:
|
||||
# # WE RETURN NONE IF NO ALERT (EITHER RECOVERY OR WARNING OR
|
||||
# # CRITICAL) NEEDS TO BE FIRED
|
||||
# logger.debug("Value {} of {} for tag {} has occurred {} time(s) < "
|
||||
# "threshold of {}".format(
|
||||
# value,
|
||||
# alert['id'],
|
||||
# tag,
|
||||
# alert['alert_tags'][tag_count],
|
||||
# occurrences_threshold))
|
||||
# if availability:
|
||||
# logger.info("Sending availability stat")
|
||||
# send_metrics(alert, 1, result, 'service_level')
|
||||
# return None
|
||||
|
||||
#logger.debug(
|
||||
# "Alert {}->[{}]->{}, Occurrences={}".format(
|
||||
# alert['id'], tag, current_alert_status,
|
||||
# alert['alert_tags'][tag_count]))
|
||||
#return alert_entity, alert_body, alert['alert_tags'][tag], alert_tags, md5(tag.encode('utf-8')).hexdigest()[:10]
|
||||
|
||||
|
||||
def check_kairosdb_alert(
|
||||
alert_config,
|
||||
service_config,
|
||||
logger,
|
||||
production_mode=True):
|
||||
"""
|
||||
Args:
|
||||
alert_config (dict): Config of the alert to run
|
||||
service_config (dict): Holds things like urls, tokens and other things
|
||||
logger (log object): does the logging
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
availability = False
|
||||
# SLEEP A RANDOM TIME BETWEEN 0 AND INTERVAL SO THAT ALL ALERTS DON'T
|
||||
# START AT THE SAME TIME
|
||||
wait_time = random.randint(0, alert_config['interval'])
|
||||
logger.info(
|
||||
"ALERT_CONFIG: {}\tsleep: {}".format(
|
||||
alert_config['id'],
|
||||
wait_time))
|
||||
sleep(wait_time)
|
||||
# For metrics with availability set to true, we default the interval to 5
|
||||
# mins due Grafana limitations
|
||||
if 'availability' in alert_config and alert_config['availability']:
|
||||
availability = True
|
||||
# ====================
|
||||
# EACH CHECK JUST LOOPS
|
||||
# ====================
|
||||
ret = None
|
||||
while True:
|
||||
try:
|
||||
send_stat("check_run", 1, {'id': alert_config['id']})
|
||||
# BUILD URL FOR KAIROSDB METRICS AND QUERY FOR RESULTS
|
||||
query_url = os.path.join(
|
||||
service_config['kairosdb_url'] +
|
||||
"api/v1/datapoints/query")
|
||||
ret = requests.post(
|
||||
query_url,
|
||||
data=json.dumps(
|
||||
alert_config['query']),
|
||||
timeout=service_config['timeout'])
|
||||
assert ret.status_code == 200
|
||||
|
||||
# GOT DATA BACK, NOW TO COMPARE IT TO THE THRESHOLD
|
||||
results = ret.json()['queries'][0]['results']
|
||||
logger.debug(
|
||||
"Got back {} results for alert {}".format(
|
||||
len(results), alert_config['id']))
|
||||
log_alert_results(results, alert_config, logger)
|
||||
alert_list = []
|
||||
|
||||
# LOOP THROUGH ALL THE RESULTS
|
||||
for r in results:
|
||||
alert_tags = (get_alert_tags(alert_config, r)
|
||||
if has_custom_alert_routing(alert_config) else None)
|
||||
|
||||
# OUR QUERY RETURNED SOME VALUES - FIND MIN AND MAX VALUES
|
||||
# THEREIN AND EXAMINE FOR FAILURE
|
||||
if r['values']:
|
||||
minvalue = min([x[1] for x in r['values']])
|
||||
maxvalue = max([x[1] for x in r['values']])
|
||||
# SEND VALUES TO BUILD_ALERT_MESSAGE, WHICH RETURNS NONE OR
|
||||
# AN OBJECT
|
||||
alert_list.append(
|
||||
build_alert_message(
|
||||
alert_config,
|
||||
minvalue,
|
||||
maxvalue,
|
||||
r,
|
||||
logger,
|
||||
availability,
|
||||
alert_tags=alert_tags))
|
||||
|
||||
# THIS MEANS OUR KAIROS QUERY RETURNED NOTHING. COULD BE NETWORK
|
||||
# ISSUES. WE WILL TOLERATE THIS FOR X OCCURRENCES. (X=10)
|
||||
# AFTER X OCCURRENCES OF KAIROS NOT RETURNING DATA WE WILL CLEAR
|
||||
# AOM'S BRAIN FOR THIS ALERT ID AND TAG COMBINATION TO AVOID A
|
||||
# LATER OCCURRENCE CAUSING A PREMATURE ALERT.
|
||||
# A NO-OP IF NO HISTORY.
|
||||
elif 'alert_tags' in alert_config:
|
||||
for key in alert_config['alert_tags']:
|
||||
if ('count' not in key and 'noresult' not in key and
|
||||
alert_config['alert_tags'][key] > 0):
|
||||
key_noresult = key + "_noresult"
|
||||
key_count = key + "_count"
|
||||
if alert_config['alert_tags'][key_noresult] > 10:
|
||||
logger.info("{} occurrences of no results back "
|
||||
"for {}, clear out counts for tag '{}'".format(
|
||||
alert_config['alert_tags'][key_noresult],
|
||||
alert_config['id'], key))
|
||||
alert_list.append(
|
||||
build_alert_message(
|
||||
alert_config,
|
||||
0,
|
||||
0,
|
||||
None,
|
||||
logger,
|
||||
availability,
|
||||
key,
|
||||
alert_tags=alert_tags))
|
||||
alert_config['alert_tags'][key] = 0
|
||||
alert_config['alert_tags'][key_count] = 0
|
||||
alert_config['alert_tags'][key_noresult] = 0
|
||||
else:
|
||||
alert_config['alert_tags'][key_noresult] += 1
|
||||
logger.info("{} occurrences of no results back "
|
||||
"for {}, tag '{}'".format(
|
||||
alert_config['alert_tags'][key_noresult],
|
||||
alert_config['id'], key))
|
||||
|
||||
# SEND ALL ALERTS FOUND TO THE ALERT HANDLERS THAT ARE NOT NONE
|
||||
for alert in [x for x in alert_list if x is not None]:
|
||||
if production_mode:
|
||||
send_alerts(
|
||||
alert,
|
||||
copy.deepcopy(alert_config),
|
||||
service_config['victorops_url'],
|
||||
service_config['slack_url'],
|
||||
service_config['slack_token'],
|
||||
service_config['smtp_server'],
|
||||
service_config['sensu_endpoint'],
|
||||
service_config['uchiwa_url'],
|
||||
logger)
|
||||
else:
|
||||
logger.info(
|
||||
"Sending alert for: {}".format(
|
||||
alert_config.get('id')))
|
||||
|
||||
# HANDLE THE UNEXPECTED
|
||||
except TimeoutError:
|
||||
logger.error("Query [{}] took to long to run".format(
|
||||
alert_config['id']))
|
||||
except AssertionError:
|
||||
logger.error(
|
||||
"KairsoDB query failed: {}\n"
|
||||
"HTTP status code:\t{}\n"
|
||||
"Error Message:\t{}\nQuery:\n"
|
||||
"{}".format(
|
||||
ret.url,
|
||||
ret.status_code,
|
||||
ret.text,
|
||||
alert_config['query']))
|
||||
except gaierror:
|
||||
logger.error(
|
||||
"Unable to connect to smtp server: {}".format(
|
||||
service_config['smtp_server']))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unhandled exception {} on alert: {}".format(
|
||||
str(e), alert_config['id']))
|
||||
finally:
|
||||
sleep(alert_config['interval'])
|
||||
|
||||
|
||||
def check_prometheus_alert(
|
||||
alert_config,
|
||||
service_config,
|
||||
logger,
|
||||
production_mode=True):
|
||||
"""
|
||||
Args:
|
||||
alert_config (dict): Config of the alert to run
|
||||
service_config (dict): Holds things like urls, tokens and other things
|
||||
logger (log object): does the logging
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
# SLEEP A RANDOM TIME BETWEEN 0 AND INTERVAL SO THAT ALL ALERTS DON'T
|
||||
# START AT THE SAME TIME
|
||||
wait_time = random.randint(0, alert_config['interval'])
|
||||
logger.info(
|
||||
"ALERT_CONFIG: {}\tsleep: {}".format(
|
||||
alert_config['id'],
|
||||
wait_time))
|
||||
sleep(wait_time)
|
||||
# For metrics with availability set to true, we default the interval to 5
|
||||
# mins due to Grafana limitations
|
||||
availability = bool(alert_config.get('availability'))
|
||||
|
||||
# ====================
|
||||
# EACH CHECK JUST LOOPS
|
||||
# ====================
|
||||
ret = None
|
||||
while True:
|
||||
try:
|
||||
send_stat("check_run", 1, {'id': alert_config['id']})
|
||||
prom_api = PromAPI(endpoint=alert_config['prometheus_url'])
|
||||
ret = prom_api.query_range(
|
||||
query=alert_config['query'],
|
||||
start=alert_config['start_time'],
|
||||
end=alert_config['end_time'],
|
||||
duration=alert_config['interval'])
|
||||
|
||||
assert ret['status'] == 'success'
|
||||
|
||||
# GOT DATA BACK, NOW TO COMPARE IT TO THE THRESHOLD
|
||||
results = ret['data']['result']
|
||||
logger.debug(
|
||||
"Got back {} results for alert {}".format(
|
||||
len(results), alert_config['id']))
|
||||
log_alert_results(results, alert_config, logger)
|
||||
alert_list = []
|
||||
|
||||
# LOOP THROUGH ALL THE RESULTS
|
||||
for r in results:
|
||||
alert_tags = (get_alert_tags(alert_config, r) if
|
||||
has_custom_alert_routing(alert_config) else None)
|
||||
|
||||
# REARRANGE RESULT TO MORE CLOSELY MATCH KAIROSDB RESULT
|
||||
r['tags'] = {key: [value]
|
||||
for (key, value) in r['metric'].items()}
|
||||
|
||||
# OUR QUERY RETURNED SOME VALUES - FIND MIN AND MAX VALUES
|
||||
# THEREIN AND EXAMINE FOR FAILURE
|
||||
if r['values']:
|
||||
raw_values = [value for _, value in r['values']]
|
||||
min_value = float(min(raw_values))
|
||||
max_value = float(max(raw_values))
|
||||
# SEND VALUES TO BUILD_ALERT_MESSAGE, WHICH RETURNS NONE OR
|
||||
# AN OBJECT
|
||||
alert_list.append(
|
||||
build_alert_message(
|
||||
alert_config,
|
||||
min_value,
|
||||
max_value,
|
||||
r,
|
||||
logger,
|
||||
availability,
|
||||
alert_tags=alert_tags))
|
||||
|
||||
# THIS MEANS OUR QUERY RETURNED NOTHING. COULD BE NETWORK ISSUES
|
||||
# WE WILL TOLERATE THIS FOR X OCCURRENCES. (X=10)
|
||||
# AFTER X OCCURRENCES OF NOT RETURNING DATA WE WILL CLEAR AOM'S
|
||||
# BRAIN FOR THIS ALERT ID AND TAG COMBINATION TO AVOID A LATER
|
||||
# OCCURRENCE CAUSING A PREMATURE ALERT. A NO-OP IF NO HISTORY.
|
||||
elif 'alert_tags' in alert_config:
|
||||
for key in alert_config['alert_tags']:
|
||||
if ('count' not in key and 'noresult' not in key and
|
||||
alert_config['alert_tags'][key] > 0):
|
||||
key_noresult = key + "_noresult"
|
||||
key_count = key + "_count"
|
||||
if alert_config['alert_tags'][key_noresult] > 10:
|
||||
logger.info("{} occurrences of no results back "
|
||||
"for {}, clear out counts for tag '{}'".format(
|
||||
alert_config['alert_tags'][key_noresult],
|
||||
alert_config['id'], key))
|
||||
alert_list.append(
|
||||
build_alert_message(
|
||||
alert_config,
|
||||
0,
|
||||
0,
|
||||
None,
|
||||
logger,
|
||||
availability,
|
||||
key,
|
||||
alert_tags=alert_tags))
|
||||
alert_config['alert_tags'][key] = 0
|
||||
alert_config['alert_tags'][key_count] = 0
|
||||
alert_config['alert_tags'][key_noresult] = 0
|
||||
else:
|
||||
alert_config['alert_tags'][key_noresult] += 1
|
||||
logger.info("{} occurrences of no results back "
|
||||
"for {}, tag '{}'".format(
|
||||
alert_config['alert_tags'][key_noresult],
|
||||
alert_config['id'], key))
|
||||
|
||||
# SEND ALL ALERTS FOUND TO THE ALERT HANDLERS THAT ARE NOT NONE
|
||||
for alert in [x for x in alert_list if x is not None]:
|
||||
if production_mode:
|
||||
send_alerts(
|
||||
alert,
|
||||
copy.deepcopy(alert_config),
|
||||
service_config['victorops_url'],
|
||||
service_config['slack_url'],
|
||||
service_config['slack_token'],
|
||||
service_config['smtp_server'],
|
||||
service_config['sensu_endpoint'],
|
||||
service_config['uchiwa_url'],
|
||||
logger)
|
||||
else:
|
||||
logger.info(
|
||||
"Sending alert {}".format(
|
||||
alert_config.get('id')))
|
||||
|
||||
# HANDLE THE UNEXPECTED
|
||||
except TimeoutError:
|
||||
logger.error(
|
||||
"Query [{}] took to long to run".format(
|
||||
alert_config['id']))
|
||||
except AssertionError:
|
||||
logger.error(
|
||||
"Prometheus query failed:\n"
|
||||
"Status:\t{}\n"
|
||||
"Error Type:\t{}\n"
|
||||
"Error Message:\t{}\n"
|
||||
"Query:\n{}".format(
|
||||
ret['status'],
|
||||
ret['errorType'],
|
||||
ret['error'],
|
||||
alert_config['query']))
|
||||
except gaierror:
|
||||
logger.error(
|
||||
"Unable to connect to smtp server: {}".format(
|
||||
service_config['smtp_server']))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unhandled exception {} on alert: {}".format(
|
||||
str(e), alert_config['id']))
|
||||
finally:
|
||||
sleep(alert_config['interval'])
|
||||
|
||||
|
||||
# LOG ALERT RESULTS SO WE CAN DEBUG IF NEEDED
|
||||
def log_alert_results(results, alert_config, logger):
|
||||
"""
|
||||
Logs the results broken out by tag provided in the alert_config to the
|
||||
logger for debuging
|
||||
Args:
|
||||
results: the results object returned from the call to kairosdb, of just
|
||||
the results
|
||||
alert_config: config object of the alert
|
||||
logger (log object): does the logging
|
||||
Returns:
|
||||
None, logs to logger
|
||||
"""
|
||||
|
||||
for v in results:
|
||||
logger.debug("{} - Result: {}".format(alert_config['id'], v))
|
||||
|
||||
|
||||
def send_alerts(
|
||||
alert,
|
||||
alert_config,
|
||||
victorops_url,
|
||||
slack_url,
|
||||
slack_token,
|
||||
smtp_server,
|
||||
sensu_endpoint,
|
||||
uchiwa_url,
|
||||
logger):
|
||||
"""
|
||||
Sends out the alerts to VO, Email, and/or Slack
|
||||
Args:
|
||||
alert: the alert tuple:
|
||||
alert[0] == subject alert[1] == body alert[3] == alert_tags alert[4] == md5sum
|
||||
alert_config: the alert configuration object
|
||||
victorops_url: url to victorops
|
||||
slack_url: url to slack api calls
|
||||
slack_token: the token for the alert
|
||||
smtp_server: The server to send mail messages too
|
||||
sensu_endpoint:
|
||||
uchiwa_url:
|
||||
logger (log object): does the logging
|
||||
Returns: None
|
||||
"""
|
||||
# GOING TO USE THIS FOR TAGGING SOME METRICS ABOUT WHAT ALERT CHANNEL WAS
|
||||
# USED
|
||||
tag_dict = dict()
|
||||
tag_dict['alert'] = alert_config['id']
|
||||
|
||||
is_custom_alert_routing = has_custom_alert_routing(alert_config)
|
||||
if is_custom_alert_routing:
|
||||
alert_routing = alert_config.get('alert_routing_lookup', {})
|
||||
alert_config['alerts'] = alert_routing.get(
|
||||
alert[3], alert_config['alerts']['lookup']['default'])
|
||||
|
||||
# once we move all alerts into sensu, we dont need to tho this
|
||||
if 'filters' in alert_config:
|
||||
logger.info(
|
||||
"alert_status : {}, alert_config: {}".format(
|
||||
alert[2], alert_config))
|
||||
if 'slack_subdue' in alert_config['filters'] and alert[2] in (
|
||||
1, 2) and alert_config['filters']['slack_subdue']:
|
||||
# unless the alert is critical we dont send it
|
||||
logger.info("Removed slack, alert_config: {}".format(alert_config))
|
||||
alert_config['alerts'].pop('slack', None)
|
||||
if ('victorops_subdue' in alert_config['filters'] and
|
||||
alert[2] in (1, 2) and
|
||||
alert_config['filters']['victorops_subdue']):
|
||||
# unless the alert is critical we dont send it
|
||||
alert_config['alerts'].pop('vo', None)
|
||||
logger.info("Removed vo, alert_config: {}".format(alert_config))
|
||||
|
||||
# ====================
|
||||
# VICTOROPS HANDLING
|
||||
# ====================
|
||||
if 'vo' in alert_config['alerts']:
|
||||
for notify in alert_config['alerts']['vo']:
|
||||
payload = dict(entity_id=alert[0],
|
||||
message_type=alert_status[alert[2]],
|
||||
state_message=alert[1])
|
||||
r = None
|
||||
try:
|
||||
r = requests.post(
|
||||
victorops_url + notify,
|
||||
data=json.dumps(payload),
|
||||
headers={
|
||||
"Content-type": "application-json"})
|
||||
assert r.status_code == 200
|
||||
# Record a VO alert sent event
|
||||
tag_dict['alert_channel_type'] = "VictorOps"
|
||||
tag_dict['who'] = "vo:{}".format(notify)
|
||||
send_stat("alert_channel", 1, tag_dict)
|
||||
# logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
|
||||
except AssertionError:
|
||||
logger.error(
|
||||
"Post to VO failed for {}\n{}:\t{}".format(
|
||||
alert_config['id'], r.status_code, r.text))
|
||||
except Exception as e:
|
||||
logger.error("Unhandled exception for alert_id:{} "
|
||||
"when posting to VO: {}".format(
|
||||
alert_config['id'], str(e)))
|
||||
|
||||
# ====================
|
||||
# EMAIL HANDLING
|
||||
# ====================
|
||||
if 'email' in alert_config['alerts'] and (
|
||||
alert[2] == 0 or alert[2] == 1 or alert[2] == 3):
|
||||
msg = MIMEText(alert[1])
|
||||
msg['Subject'] = '{} Status: {}'.format(
|
||||
alert[0], alert_status[alert[2]])
|
||||
msg['From'] = 'aom@qualtrics.com'
|
||||
msg['To'] = ','.join(
|
||||
[x + "@qualtrics.com" for x in alert_config['alerts']['email']])
|
||||
try:
|
||||
s = smtplib.SMTP(smtp_server)
|
||||
s.send_message(msg)
|
||||
s.quit()
|
||||
# Record an Email alert sent event
|
||||
tag_dict['alert_channel_type'] = "Email"
|
||||
tag_dict['who'] = "email:{}".format(msg['To'])
|
||||
send_stat("alert_channel", 1, tag_dict)
|
||||
# logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unhandled exception when sending mail for {} to {}\n{}".format(
|
||||
alert_config['id'], smtp_server, str(e)))
|
||||
|
||||
# ====================
|
||||
# SENSU HANDLING
|
||||
# ====================
|
||||
if 'sensu' in alert_config['alerts']:
|
||||
# Dictionary with static values for Sensu
|
||||
sensu_dict = {
|
||||
'source': 'AOM',
|
||||
'refresh': 3600,
|
||||
'occurrences': 1,
|
||||
'name': alert_config['id']+'__'+alert[4]}
|
||||
# if alert[3]:
|
||||
# logger.info(alert)
|
||||
# sensu_dict['name'] = '_'.join(
|
||||
# [alert_config['id']] + sorted(list(alert[3])))
|
||||
if 'refresh' in alert_config:
|
||||
sensu_dict['refresh'] = alert_config['refresh']
|
||||
sensu_dict['interval'] = alert_config['interval']
|
||||
sensu_dict['handlers'] = []
|
||||
sensu_dict['dashboard'] = alert_config['url']
|
||||
if 'dependencies' in alert_config['alerts']['sensu'].keys():
|
||||
sensu_dict['dependencies'] = (alert_config['alerts']
|
||||
['sensu']['dependencies'])
|
||||
if 'victorops' in alert_config['alerts']['sensu'].keys():
|
||||
sensu_dict['handlers'].append("victorops")
|
||||
sensu_dict['routing_key'] = (alert_config['alerts']
|
||||
['sensu']['victorops'])
|
||||
# # Leave this here until we have email support in Sensu
|
||||
# if 'email' in alert_config['alerts']['sensu'].keys():
|
||||
# sensu_dict['handlers'].append("email")
|
||||
# # verify this option
|
||||
# sensu_dict['email'] = alert_config['alerts']['sensu']['email']
|
||||
if 'slack' in alert_config['alerts']['sensu'].keys():
|
||||
sensu_dict['handlers'].append("slack")
|
||||
sensu_dict['slack_channel'] = (
|
||||
alert_config['alerts']['sensu']['slack'])
|
||||
# Format alert message
|
||||
sensu_dict['dashboard'] = (
|
||||
"<{}|here> , Uchiwa: <{}?check={}|here> ".format(
|
||||
alert_config['url'], uchiwa_url, alert_config['id']))
|
||||
if 'jira' in alert_config['alerts']['sensu'].keys():
|
||||
sensu_dict['handlers'].append("jira")
|
||||
sensu_dict.update(alert_config['alerts']['sensu']['jira'])
|
||||
if 'filters' in alert_config:
|
||||
sensu_dict['filters'] = alert_config['filters']
|
||||
# 0 = OK, 1 = WARNING, 2 = CRITICAL
|
||||
sensu_status = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2}
|
||||
sensu_dict['status'] = sensu_status[alert[2]]
|
||||
sensu_dict['output'] = alert[1]
|
||||
|
||||
r = None
|
||||
try:
|
||||
user = os.environ['API_USER']
|
||||
passwd = os.environ['API_PASS']
|
||||
r = requests.post(
|
||||
sensu_endpoint,
|
||||
json.dumps(sensu_dict),
|
||||
auth=(
|
||||
user,
|
||||
passwd))
|
||||
assert r.status_code == 202
|
||||
except AssertionError:
|
||||
logger.error(
|
||||
"Post to Sensu failed {}\n{}:\t{}".format(
|
||||
alert_config['id'],
|
||||
r.status_code,
|
||||
r.text))
|
||||
except Exception as e:
|
||||
logger.error("Unhandled exception for alert_id:{} "
|
||||
"when posting to Sensu: {}".format(
|
||||
alert_config['id'], str(e)))
|
||||
|
||||
# ====================
|
||||
# SLACK HANDLING - all Slack alerts will go through Sensu
|
||||
# ====================
|
||||
if 'slack' in alert_config['alerts'] and (
|
||||
alert[2] == 0 or alert[2] == 1 or alert[2] == 3):
|
||||
refresh = alert_config.get('refresh', 3600)
|
||||
dashboard = alert_config.get('url', '')
|
||||
sensu_status = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2}
|
||||
sensu_dict2 = {'handlers': ['slack'],
|
||||
'interval': alert_config['interval'],
|
||||
'source': 'AOM',
|
||||
'refresh': refresh,
|
||||
'occurrences': 1,
|
||||
'name': alert_config['id']+'__'+alert[4],
|
||||
'dashboard': dashboard,
|
||||
'status': sensu_status[alert[2]],
|
||||
'output': alert[1]}
|
||||
if is_custom_alert_routing:
|
||||
sensu_dict2['name'] = '_'.join(
|
||||
[alert_config['id']] + list(alert[3]))
|
||||
sensu_dict2['dashboard'] = (
|
||||
"<{}|here> , Uchiwa: <{}?check={}|here> ".format(
|
||||
alert_config['url'], uchiwa_url, alert_config['id']))
|
||||
for channel in alert_config['alerts']['slack']:
|
||||
sensu_dict2['slack_channel'] = channel
|
||||
r = None
|
||||
try:
|
||||
user = os.environ['API_USER']
|
||||
passwd = os.environ['API_PASS']
|
||||
r = requests.post(
|
||||
sensu_endpoint,
|
||||
json.dumps(sensu_dict2),
|
||||
auth=(
|
||||
user,
|
||||
passwd))
|
||||
assert r.status_code == 202
|
||||
except AssertionError:
|
||||
logger.error(
|
||||
"Post to Sensu failed {}\n{}:\t{}".format(
|
||||
alert_config['id'], r.status_code, r.text))
|
||||
except Exception as e:
|
||||
logger.error("Unhandled exception for alert_id:{} when posting"
|
||||
"to Sensu: {}".format(alert_config['id'], str(e)))
|
||||
|
||||
# payload = dict(token=slack_token, channel=channel,
|
||||
# text="{} Status: {}".format(alert[1], alert_status[alert[2]]))
|
||||
# r = None
|
||||
# try:
|
||||
# r = requests.post(slack_url, data=payload)
|
||||
# assert r.status_code == 200
|
||||
# # Record an Slack alert sent event
|
||||
# tag_dict['alert_channel_type'] = "Slack"
|
||||
# tag_dict['who'] = "slack:{}".format(channel)
|
||||
# send_stat("alert_channel", 1, tag_dict)
|
||||
# # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
|
||||
# except AssertionError:
|
||||
# logger.error("Post to Slack failed for {}\n{}:\t{}".format(alert_config['id'], r.status_code, r.text))
|
||||
# except Exception as e:
|
||||
# logger.error("Unhandled exception for alert_id:{} when posting to Slack: {}".format(alert_config['id'],
|
||||
# str(e)))
|
||||
|
||||
|
||||
def send_metrics(alert, value, result, gaugename='stats'):
|
||||
"""
|
||||
Sends the results from the alert check to statsd
|
||||
Args:
|
||||
alert: The Alert config object that holds the alert['tag'] value.
|
||||
gaugename: The name of the gauge metric we send.
|
||||
value: The value we want to send as a gauge.
|
||||
result: The result object from making the call. Use the data in this
|
||||
object to tag the metric.
|
||||
Returns: None
|
||||
"""
|
||||
# GROUP ALL THE ALERTS TOGETHER SO THAT PEEPS CAN FILTER OUT BY TAG THEIR
|
||||
# SPECIFIC ALERTS
|
||||
result_tags = list(itertools.chain(
|
||||
*[result['tags'][x] for x in alert['tags']]))
|
||||
tag_dict = dict()
|
||||
for x in range(len(alert['tags'])):
|
||||
tag_dict[alert['tags'][x]] = result_tags[x]
|
||||
tag_dict['alert'] = alert['id']
|
||||
|
||||
# SEND THE METRIC
|
||||
send_stat(gaugename, value, tag_dict)
|
||||
|
||||
|
||||
def send_stat(gaugename, value, tag_dict, statprefix='aom'):
|
||||
"""Sends stats value to statsd"""
|
||||
client = StatsClient('telegraf', 8125, statprefix)
|
||||
|
||||
# SUBMIT STATS
|
||||
client.gauge(gaugename, value, tags=tag_dict)
|
||||
|
||||
|
||||
def has_custom_alert_routing(alert_config):
|
||||
"""Checks if alert has custom routing"""
|
||||
return 'lookup' in alert_config['alerts']
|
||||
|
||||
|
||||
def get_alert_tags(alert_config, query_result):
|
||||
"""Retrieves custom tags from alert"""
|
||||
query_tags = []
|
||||
for tag in alert_config['alerts']['lookup']['tags']:
|
||||
if (alert_config.get('query_type') == 'prometheus' and
|
||||
'metric' in query_result and
|
||||
tag in query_result['metric']):
|
||||
query_tags.append(query_result['metric'][tag])
|
||||
elif ('tags' in query_result and tag in query_result['tags']
|
||||
and query_result['tags'][tag]):
|
||||
query_tags.append(query_result['tags'][tag][0])
|
||||
return tuple(query_tags)
|
||||
123
AoM_Service/library/serviceapp/test_alert.py
Executable file
123
AoM_Service/library/serviceapp/test_alert.py
Executable file
@@ -0,0 +1,123 @@
|
||||
import unittest
|
||||
|
||||
class Mock_Alert_Config() :
|
||||
def __init__(self) :
|
||||
self.cache = {}
|
||||
self.level = {}
|
||||
self.id = "id"
|
||||
|
||||
def set_level(self, k, v) :
|
||||
self.level[k] = v
|
||||
|
||||
def get_level(self, k) :
|
||||
if not k in self.level :
|
||||
return None
|
||||
return self.level[k]
|
||||
|
||||
def init_for_tags(self, *args) :
|
||||
pass
|
||||
|
||||
def occurrences(self) :
|
||||
return 1
|
||||
|
||||
def get_threshold(self, upper, warning) :
|
||||
if warning :
|
||||
return None, False
|
||||
if upper :
|
||||
return 10, True
|
||||
else :
|
||||
return 0, True
|
||||
|
||||
def get_tags(self) :
|
||||
return "tagsC, tagsD".split(", ")
|
||||
|
||||
def set_for_tags(self, key, value) :
|
||||
if not key in self.cache :
|
||||
self.cache[key] = 0
|
||||
self.cache[key] = value
|
||||
|
||||
def get_for_tags(self, key) :
|
||||
if not key in self.cache :
|
||||
self.cache[key] = 0
|
||||
return self.cache[key]
|
||||
|
||||
class Mock_Result() :
|
||||
def __init__(self) :
|
||||
pass
|
||||
|
||||
def __getitem__(self, key) :
|
||||
if key == "tags" :
|
||||
return self
|
||||
else :
|
||||
return key
|
||||
|
||||
class Mock_Logger() :
|
||||
def __init__(self) :
|
||||
for k in ["error", "warn", "debug", "info", "warning"] :
|
||||
setattr(self, k, self.log)
|
||||
|
||||
def log(self, *args) :
|
||||
pass
|
||||
|
||||
class Test_Alert(unittest.TestCase) :
|
||||
def test_set_tags(self) :
|
||||
import alert
|
||||
ac = Mock_Alert_Config()
|
||||
res = Mock_Result()
|
||||
|
||||
al = alert.Alert(ac, Mock_Logger(), None, None, -1, 11)
|
||||
self.assertEqual(al.get_tags(), "instance")
|
||||
|
||||
al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 11)
|
||||
self.assertEqual(al.get_tags(), "tagsA, tagsB")
|
||||
|
||||
al.set_tags("a, b, c", res)
|
||||
self.assertEqual(al.get_tags(), "a, b, c")
|
||||
|
||||
al.set_tags("a, b, c", res)
|
||||
self.assertEqual(al.get_tags(), "a, b, c")
|
||||
|
||||
def test_firing(self) :
|
||||
import alert
|
||||
ac = Mock_Alert_Config()
|
||||
res = Mock_Result()
|
||||
al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 11)
|
||||
self.assertTrue(al.get_firing())
|
||||
al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 1, 11)
|
||||
self.assertTrue(al.get_firing())
|
||||
al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 9)
|
||||
self.assertTrue(al.get_firing())
|
||||
al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 1, 9)
|
||||
self.assertFalse(al.get_firing())
|
||||
|
||||
def test_str(self) :
|
||||
import alert
|
||||
ac = Mock_Alert_Config()
|
||||
res = Mock_Result()
|
||||
alert = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 10)
|
||||
|
||||
self.assertEqual(alert.name(), "Metric: id for tagsA, tagsB")
|
||||
self.assertEqual(alert.body(), "")
|
||||
|
||||
def test_occurrences(self) :
|
||||
import alert
|
||||
ac = Mock_Alert_Config()
|
||||
res = Mock_Result()
|
||||
al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 10)
|
||||
self.assertEqual(False, al.occurrences_breached)
|
||||
al.set_occurrences()
|
||||
al.set_occurrences()
|
||||
al.set_occurrences()
|
||||
self.assertEqual(False, al.occurrences_breached)
|
||||
self.assertEqual(0, ac.get_for_tags(al.get_tags()))
|
||||
|
||||
al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 11)
|
||||
self.assertEqual(True, al.occurrences_breached)
|
||||
al.set_occurrences()
|
||||
al.set_occurrences()
|
||||
al.set_occurrences()
|
||||
self.assertEqual(True, al.occurrences_breached)
|
||||
self.assertEqual(4, ac.get_for_tags(al.get_tags()))
|
||||
|
||||
if __name__ == "__main__" :
|
||||
unittest.main()
|
||||
33
AoM_Service/library/serviceapp/test_alert_factory.py
Executable file
33
AoM_Service/library/serviceapp/test_alert_factory.py
Executable file
@@ -0,0 +1,33 @@
|
||||
import unittest
|
||||
import alert_factory
|
||||
|
||||
class Mock_Alert() :
|
||||
def __init__(self, *args) :
|
||||
self.args = args
|
||||
|
||||
class Mock_Logger() :
|
||||
def __init__(self) :
|
||||
self.info = self.log
|
||||
self.warn = self.log
|
||||
self.warning = self.log
|
||||
self.error = self.log
|
||||
self.debug = self.log
|
||||
|
||||
def log(self, *args, **kwargs) :
|
||||
print(args, kwargs)
|
||||
|
||||
class Test_Alert_Factory(unittest.TestCase) :
|
||||
def setUp(self) :
|
||||
self.was = alert_factory.Alert
|
||||
alert_factory.Alert = Mock_Alert
|
||||
|
||||
def tearDown(self) :
|
||||
alert_factory.Alert = self.was
|
||||
|
||||
def test(self) :
|
||||
af = alert_factory.Alert_Factory(None, Mock_Logger())
|
||||
alert = af.build(0, 5, None, "tagA, tagB", False, "tagC, tagD")
|
||||
self.assertTrue(type(alert) == Mock_Alert)
|
||||
|
||||
if __name__ == "__main__" :
|
||||
unittest.main()
|
||||
8
AoM_Service/library/serviceapp/test_service.py
Executable file
8
AoM_Service/library/serviceapp/test_service.py
Executable file
@@ -0,0 +1,8 @@
|
||||
import unittest
|
||||
|
||||
class Test_Service(unittest.TestCase) :
|
||||
def test(self) :
|
||||
raise Exception("not impl")
|
||||
|
||||
if __name__ == "__main__" :
|
||||
unittest.main()
|
||||
14
AoM_Service/library/serviceapp/test_threshold.py
Executable file
14
AoM_Service/library/serviceapp/test_threshold.py
Executable file
@@ -0,0 +1,14 @@
|
||||
import unittest
|
||||
|
||||
class Test_Threshold(unittest.TestCase) :
|
||||
def test(self) :
|
||||
import threshold
|
||||
tl = threshold.Threshold(5)
|
||||
|
||||
self.assertFalse(tl.can_breach())
|
||||
|
||||
self.assertFalse(tl.exceeds(7))
|
||||
self.assertFalse(tl.exceeds(3))
|
||||
|
||||
if __name__ == "__main__" :
|
||||
unittest.main()
|
||||
14
AoM_Service/library/serviceapp/test_threshold_lower.py
Executable file
14
AoM_Service/library/serviceapp/test_threshold_lower.py
Executable file
@@ -0,0 +1,14 @@
|
||||
import unittest
|
||||
|
||||
class Test_Threshold_Lower(unittest.TestCase) :
|
||||
def test(self) :
|
||||
import threshold_lower
|
||||
tl = threshold_lower.Threshold_Lower(5)
|
||||
|
||||
self.assertTrue(tl.can_breach)
|
||||
|
||||
self.assertTrue(tl.exceeds(3))
|
||||
self.assertFalse(tl.exceeds(7))
|
||||
|
||||
if __name__ == "__main__" :
|
||||
unittest.main()
|
||||
14
AoM_Service/library/serviceapp/test_threshold_upper.py
Executable file
14
AoM_Service/library/serviceapp/test_threshold_upper.py
Executable file
@@ -0,0 +1,14 @@
|
||||
import unittest
|
||||
|
||||
class Test_Threshold_Upper(unittest.TestCase) :
|
||||
def test(self) :
|
||||
import threshold_upper
|
||||
tl = threshold_upper.Threshold_Upper(5)
|
||||
|
||||
self.assertTrue(tl.can_breach)
|
||||
|
||||
self.assertTrue(tl.exceeds(7))
|
||||
self.assertFalse(tl.exceeds(3))
|
||||
|
||||
if __name__ == "__main__" :
|
||||
unittest.main()
|
||||
157
AoM_Service/library/serviceapp/test_thresholds.py
Executable file
157
AoM_Service/library/serviceapp/test_thresholds.py
Executable file
@@ -0,0 +1,157 @@
|
||||
import unittest
|
||||
|
||||
class Mock_Alert_Config() :
|
||||
def __init__(self) :
|
||||
self.upCrit = 10
|
||||
self.lowCrit = 1
|
||||
|
||||
def get_threshold(self, upper, warn) :
|
||||
if upper and warn :
|
||||
return None, False
|
||||
elif upper and not warn :
|
||||
return self.upCrit, True
|
||||
elif not upper and warn :
|
||||
return None, False
|
||||
else:
|
||||
return self.lowCrit, True
|
||||
|
||||
class Test_Thresholds(unittest.TestCase) :
|
||||
def test_breached_both(self) :
|
||||
import thresholds
|
||||
alert_config = Mock_Alert_Config()
|
||||
t = thresholds.Thresholds(alert_config)
|
||||
t.set_breached(alert_config.lowCrit-1, alert_config.upCrit+1)
|
||||
|
||||
should_fire = [
|
||||
t.critical_breached(),
|
||||
t.lower_breached(),
|
||||
t.upper_breached(),
|
||||
|
||||
t.level_breached(t.CRITICAL),
|
||||
t.end_breached(t.LOWER),
|
||||
t.end_breached(t.UPPER),
|
||||
|
||||
t.get_breached(),
|
||||
t.get_breached(level=t.CRITICAL),
|
||||
t.get_breached(end=t.LOWER),
|
||||
t.get_breached(end=t.UPPER),
|
||||
]
|
||||
for i in range(len(should_fire)) :
|
||||
self.assertTrue(should_fire[i], i)
|
||||
|
||||
should_not_fire = [
|
||||
t.warning_breached(),
|
||||
|
||||
t.level_breached(t.WARNING),
|
||||
|
||||
t.get_breached(level=t.WARNING),
|
||||
]
|
||||
for i in range(len(should_not_fire)) :
|
||||
self.assertFalse(should_not_fire[i], i)
|
||||
|
||||
|
||||
def test_breached_lower(self) :
|
||||
import thresholds
|
||||
alert_config = Mock_Alert_Config()
|
||||
t = thresholds.Thresholds(alert_config)
|
||||
t.set_breached(alert_config.lowCrit-1, alert_config.upCrit)
|
||||
|
||||
should_fire = [
|
||||
t.critical_breached(),
|
||||
t.lower_breached(),
|
||||
|
||||
t.level_breached(t.CRITICAL),
|
||||
t.end_breached(t.LOWER),
|
||||
|
||||
t.get_breached(),
|
||||
t.get_breached(level=t.CRITICAL),
|
||||
t.get_breached(end=t.LOWER),
|
||||
]
|
||||
for i in range(len(should_fire)) :
|
||||
self.assertTrue(should_fire[i], i)
|
||||
|
||||
should_not_fire = [
|
||||
t.warning_breached(),
|
||||
t.upper_breached(),
|
||||
|
||||
t.level_breached(t.WARNING),
|
||||
t.end_breached(t.UPPER),
|
||||
|
||||
t.get_breached(level=t.WARNING),
|
||||
t.get_breached(end=t.UPPER),
|
||||
]
|
||||
for i in range(len(should_not_fire)) :
|
||||
self.assertFalse(should_not_fire[i], i)
|
||||
|
||||
def test_breached_upper(self) :
|
||||
import thresholds
|
||||
alert_config = Mock_Alert_Config()
|
||||
t = thresholds.Thresholds(alert_config)
|
||||
t.set_breached(alert_config.lowCrit, alert_config.upCrit+1)
|
||||
|
||||
should_fire = [
|
||||
t.critical_breached(),
|
||||
t.upper_breached(),
|
||||
|
||||
t.level_breached(t.CRITICAL),
|
||||
t.end_breached(t.UPPER),
|
||||
|
||||
t.get_breached(),
|
||||
t.get_breached(level=t.CRITICAL),
|
||||
t.get_breached(end=t.UPPER),
|
||||
]
|
||||
for i in range(len(should_fire)) :
|
||||
self.assertTrue(should_fire[i], i)
|
||||
|
||||
for i in [
|
||||
t.warning_breached(),
|
||||
t.lower_breached(),
|
||||
|
||||
t.level_breached(t.WARNING),
|
||||
t.end_breached(t.LOWER),
|
||||
|
||||
t.get_breached(level=t.WARNING),
|
||||
t.get_breached(end=t.LOWER),
|
||||
] :
|
||||
self.assertFalse(i)
|
||||
|
||||
def test_breached_notset(self) :
|
||||
import thresholds
|
||||
alert_config = Mock_Alert_Config()
|
||||
t = thresholds.Thresholds(alert_config)
|
||||
|
||||
for i in [
|
||||
t.warning_breached(),
|
||||
t.critical_breached(),
|
||||
t.upper_breached(),
|
||||
t.lower_breached(),
|
||||
|
||||
t.level_breached(t.CRITICAL),
|
||||
t.level_breached(t.WARNING),
|
||||
t.end_breached(t.UPPER),
|
||||
t.end_breached(t.LOWER),
|
||||
|
||||
t.get_breached(),
|
||||
t.get_breached(level=t.CRITICAL),
|
||||
t.get_breached(level=t.WARNING),
|
||||
t.get_breached(end=t.UPPER),
|
||||
t.get_breached(end=t.LOWER),
|
||||
] :
|
||||
self.assertFalse(i)
|
||||
|
||||
def test_get_matching(self) :
|
||||
import thresholds
|
||||
alert_config = Mock_Alert_Config()
|
||||
t = thresholds.Thresholds(alert_config)
|
||||
self.assertEqual(4, len([i for i in t.get_thresholds_matching()]))
|
||||
self.assertEqual(2, len([i for i in t.get_thresholds_matching(level=t.CRITICAL)]))
|
||||
self.assertEqual(2, len([i for i in t.get_thresholds_matching(level=t.WARNING)]))
|
||||
self.assertEqual(2, len([i for i in t.get_thresholds_matching(end=t.UPPER)]))
|
||||
self.assertEqual(2, len([i for i in t.get_thresholds_matching(end=t.LOWER)]))
|
||||
self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.CRITICAL, level=t.LOWER)]))
|
||||
self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.CRITICAL, level=t.UPPER)]))
|
||||
self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.WARNING, level=t.LOWER)]))
|
||||
self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.WARNING, level=t.UPPER)]))
|
||||
|
||||
if __name__ == "__main__" :
|
||||
unittest.main()
|
||||
19
AoM_Service/library/serviceapp/threshold.py
Executable file
19
AoM_Service/library/serviceapp/threshold.py
Executable file
@@ -0,0 +1,19 @@
|
||||
class Threshold() :
|
||||
def __init__(self, threshold) :
|
||||
self.threshold = threshold
|
||||
self.breached = False
|
||||
|
||||
def can_breach(self) :
|
||||
return False
|
||||
|
||||
def set_breached(self, value) :
|
||||
self.breached = self.exceeds(value)
|
||||
|
||||
def get_breached(self) :
|
||||
return self.breached
|
||||
|
||||
def exceeds(self, value) :
|
||||
return False
|
||||
|
||||
def get_threshold(self) :
|
||||
return self.threshold
|
||||
8
AoM_Service/library/serviceapp/threshold_lower.py
Executable file
8
AoM_Service/library/serviceapp/threshold_lower.py
Executable file
@@ -0,0 +1,8 @@
|
||||
from threshold import Threshold
|
||||
|
||||
class Threshold_Lower(Threshold) :
|
||||
def exceeds(self, value) :
|
||||
return self.threshold > value
|
||||
|
||||
def can_breach(self) :
|
||||
return True
|
||||
8
AoM_Service/library/serviceapp/threshold_upper.py
Executable file
8
AoM_Service/library/serviceapp/threshold_upper.py
Executable file
@@ -0,0 +1,8 @@
|
||||
from threshold import Threshold
|
||||
|
||||
class Threshold_Upper(Threshold) :
|
||||
def exceeds(self, value) :
|
||||
return self.threshold < value
|
||||
|
||||
def can_breach(self) :
|
||||
return True
|
||||
67
AoM_Service/library/serviceapp/thresholds.py
Executable file
67
AoM_Service/library/serviceapp/thresholds.py
Executable file
@@ -0,0 +1,67 @@
|
||||
from threshold_upper import Threshold_Upper
|
||||
from threshold_lower import Threshold_Lower
|
||||
from threshold import Threshold
|
||||
|
||||
class Thresholds() :
|
||||
WARNING = True
|
||||
CRITICAL = False
|
||||
UPPER = True
|
||||
LOWER = False
|
||||
|
||||
def __init__(self, alert_config) :
|
||||
self.alert_config = alert_config
|
||||
self.thresholds = {}
|
||||
for level in [ Thresholds.WARNING, Thresholds.CRITICAL ] :
|
||||
self.thresholds[level] = {}
|
||||
for end in [ Thresholds.UPPER, Thresholds.LOWER ] :
|
||||
constructor = Threshold_Upper
|
||||
if end == Thresholds.LOWER :
|
||||
constructor = Threshold_Lower
|
||||
self.thresholds[level][end] = self.create_threshold(end, level, constructor)
|
||||
|
||||
def create_threshold(self, isUpper, isWarning, constructor) :
|
||||
value, has = self.alert_config.get_threshold(isUpper, isWarning)
|
||||
if not has :
|
||||
constructor = Threshold
|
||||
return constructor(value)
|
||||
|
||||
def warning_breached(self) :
|
||||
return self.level_breached(Thresholds.WARNING)
|
||||
|
||||
def critical_breached(self) :
|
||||
return self.level_breached(Thresholds.CRITICAL)
|
||||
|
||||
def upper_breached(self) :
|
||||
return self.end_breached(Thresholds.UPPER)
|
||||
|
||||
def lower_breached(self) :
|
||||
return self.end_breached(Thresholds.LOWER)
|
||||
|
||||
def level_breached(self, level) :
|
||||
return self.get_breached(level=level)
|
||||
|
||||
def end_breached(self, end) :
|
||||
return self.get_breached(end=end)
|
||||
|
||||
def can_breach(self) :
|
||||
can_breach = [t for t in self.thresholds.get_thresholds_matching() if not type(t) is Threshold]
|
||||
return len(can_breach) > 0
|
||||
|
||||
def get_breached(self, level=None, end=None) :
|
||||
for threshold in self.get_thresholds_matching(level=level, end=end) :
|
||||
if threshold.get_breached() :
|
||||
return True
|
||||
return False
|
||||
|
||||
def set_breached(self, min_value, max_value) :
|
||||
for threshold in self.get_thresholds_matching(end=Thresholds.LOWER) :
|
||||
threshold.set_breached(min_value)
|
||||
for threshold in self.get_thresholds_matching(end=Thresholds.UPPER) :
|
||||
threshold.set_breached(max_value)
|
||||
|
||||
def get_thresholds_matching(self, level=None, end=None) :
|
||||
for l in self.thresholds :
|
||||
if level is None or l == level :
|
||||
for e in self.thresholds[l] :
|
||||
if end is None or e == end :
|
||||
yield self.thresholds[l][e]
|
||||
Reference in New Issue
Block a user