QVolution2019.2/AoM_Service/library/serviceapp/alert.py

190 lines
6.6 KiB
Python
Executable File

from thresholds import Thresholds
class Alert() :
def __init__(self, alert_config, logger, tags, result, min_value, max_value) :
self.occurrences_breached = False
self.new_level_breached = False
self.info = logger.info
self.debug = logger.debug
self.warning = logger.warning
self.error = logger.error
self.alert_config = alert_config
self.thresholds = Thresholds(alert_config)
self.tags = ""
self.result = result
self.set_tags(tags)
self.alert_config.init_for_tags(alert_config.get_tags())
self.set_firing(min_value, max_value)
if availability :
self.info("Sending availability stat 1")
self.send_metrics(self.name(), 0 if self.level() == "CRITICAL" else 1, self.result, 'service_level')
def name(self) :
return "Metric: {} for {}".format(self.alert_config.id, self.get_tags())
def body(self) :
body = ""
if not self.get_firing() :
body = self.get_not_firing_body()
else :
body = self.get_is_firing_body()
self.debug("Alert {}->[{}]->{}, Occurrences={} of {}".format(
self.name(),
self.get_tags(),
self.level(),
self.get_occurrences(),
self.alert_config.occurrences(),
))
self.send_metrics(self.name(), self.level_code(), self.level())
# TODO
return body, md5(tag.encode('utf-8')).hexdigest()[:10]
def level(self) :
if not self.get_firing() :
return "RECOVERY"
if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.CRITICAL)] :
return "CRITICAL"
if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.WARNING)] :
return "WARNING"
def level_code(self) :
level = self.level()
if level == "RECOVERY" :
return 0
elif level == "WARNING" :
return 0
elif level == "CRITICAL" :
return 0
def get_not_firing_body(self) :
body = ""
body += get_not_firing_body_threshold()
body += get_not_firing_body_occurrences()
if not body :
self.alert_config.set_for_tags(self.get_tags()+"_count", force)
return ""
return "GOOD: " + body
def get_not_firing_body_threshold(self) :
if self.result is None :
return ""
body = ""
v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=True)
if not ok :
v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=False)
if ok :
body += self.form("<", v)
v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=True)
if not ok :
v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=False)
if ok :
body += self.form(">", v)
return body
def get_not_firing_body_occurrences(self) :
if not self.get_occurrences() :
return ""
body = ""
if not self.result is None :
self.send_metrics(self.name(), 1, self.level())
else :
body += "{} RECOVERY due to no results found from query. Recommend you manually validate recovery\n{}".format(self.name(), self.alert_config.url())
self.set_occurrences(force=0)
return body
def get_is_firing_body(self) :
body = ""
if self.thresholds.get_breached(level=Thresholds.UPPER) :
body += self.form(">", self.upper_firing)
if self.thresholds.get_breached(level=Thresholds.LOWER) :
body += self.form("<", self.upper_firing)
if self.occurrences_breached :
self.debug("Value {} of {} for tag {} has occurred {} time(s) < threshold of {}".format(
self.value,
self.name(),
self.get_tags(),
self.get_occurrences(),
self.alert_config.occurrences(),
))
return ""
return body
def form(self, operator, static) :
return "{}\n{:.2f} {}= {}\n{}".format(
self.name(),
self.value,
operator,
static,
self.alert_config.url(),
)
def set_tags(self, tags) :
if tags :
self.tags = tags
elif self.result :
import itertools
result_tags = [ self.result['tags'][x] for x in self.alert_config.get_tags() ]
chain = itertools.chain(result_tags)
sorted_list = sorted(list(chain))
self.tags = ", ".join(sorted_list)
if not self.tags :
self.tags = "instance"
def get_tags(self) :
return self.tags
def set_firing(self, min_value, max_value) :
self.thresholds = Thresholds(self.alert_config)
self.thresholds.set_breached(min_value, max_value)
self.set_occurrences()
self.set_new_level_breached()
self.send_metrics()
self.send_threshold_metrics()
def get_firing(self) :
return self.thresholds.get_breached() and self.occurrences_breached
def get_occurrences(self) :
tags = self.get_tags()
return self.alert_config.get_for_tags(tags)
def set_occurrences(self, force=None) :
previous_occurrences = self.get_occurrences()
if self.thresholds.get_breached() :
new_occurrences = previous_occurrences+1
self.alert_config.set_for_tags(self.get_tags(), new_occurrences)
self.occurrences_breached = self.alert_config.occurrences() <= new_occurrences
if force :
self.alert_config.set_for_tags(self.get_tags(), force)
self.alert_config.set_for_tags(self.get_tags()+"_count", force)
def send_metrics(self, *args, **kwargs) :
print("send_metrics not impl")
def set_new_level_breached(self) :
key = self.get_tags()
level = self.level()
previous_level = self.alert_config.get_level(key)
self.new_level_breached = level != previous_level
self.alert_config.set_level(key, level)
self.info("testInfo: {} {}".format(
"NEW" if self.new_level_breached else "EXISTING",
self.level(),
))
def get_new_level_breached(self) :
return self.new_level_breached
def send_threshold_metrics(self) :
# TODO
self.send_metrics(self.alert_config.id, self.value)
for level in [Thresholds.WARNING, Thresholds.CRITICAL] :
for end in [Thresholds.UPPER, Thresholds.LOWER] :
v, ok = self.alert_config.get_threshold(isUpper=level == Thresholds.UPPER, isWarning=end == Thresholds.WARNING)
if ok :
key = "{}_{}_threshold".format(
"upper" if level == Thresholds.UPPER else "lower",
"warning" if level == Thresholds.WARNING else "critical",
)
self.send_stat(key, v, {'id':self.name()})