cold

2021-09-12 22:16:11 -06:00
commit ceeb6f0385
129 changed files with 9221 additions and 0 deletions
--- a/AoM_Service/library/serviceapp/init.py
+++ b/AoM_Service/library/serviceapp/init.py
--- a/AoM_Service/library/serviceapp/alert.py
+++ b/AoM_Service/library/serviceapp/alert.py
@@ -0,0 +1,189 @@
+from thresholds import Thresholds
+
+class Alert() :
+   def __init__(self, alert_config, logger, tags, result, min_value, max_value) :
+      self.occurrences_breached = False
+      self.new_level_breached = False
+      self.info = logger.info
+      self.debug = logger.debug
+      self.warning = logger.warning
+      self.error = logger.error
+      self.alert_config = alert_config
+      self.thresholds = Thresholds(alert_config)
+      self.tags = ""
+      self.result = result
+      self.set_tags(tags)
+      self.alert_config.init_for_tags(alert_config.get_tags())
+      self.set_firing(min_value, max_value)
+      if availability :
+         self.info("Sending availability stat 1")
+         self.send_metrics(self.name(), 0 if self.level() == "CRITICAL" else 1, self.result, 'service_level')
+
+   def name(self) :
+      return "Metric: {} for {}".format(self.alert_config.id, self.get_tags())
+
+   def body(self) :
+      body = ""
+      if not self.get_firing() :
+         body = self.get_not_firing_body()
+      else :
+         body = self.get_is_firing_body()
+      self.debug("Alert {}->[{}]->{}, Occurrences={} of {}".format(
+         self.name(),
+         self.get_tags(),
+         self.level(),
+         self.get_occurrences(),
+         self.alert_config.occurrences(),
+      ))
+      self.send_metrics(self.name(), self.level_code(), self.level())
+      #  TODO
+      return body, md5(tag.encode('utf-8')).hexdigest()[:10]
+
+   def level(self) :
+      if not self.get_firing() :
+         return "RECOVERY"
+      if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.CRITICAL)] :
+         return "CRITICAL"
+      if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.WARNING)] :
+         return "WARNING"
+
+   def level_code(self) :
+      level = self.level()
+      if level == "RECOVERY" :
+         return 0
+      elif level == "WARNING" :
+         return 0
+      elif level == "CRITICAL" :
+         return 0
+
+   def get_not_firing_body(self) :
+      body = ""
+      body += get_not_firing_body_threshold()
+      body += get_not_firing_body_occurrences()
+      if not body :
+         self.alert_config.set_for_tags(self.get_tags()+"_count", force)
+         return ""
+      return "GOOD: " + body
+
+   def get_not_firing_body_threshold(self) :
+      if self.result is None :
+         return ""
+      body = ""
+      v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=True)
+      if not ok :
+         v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=False)
+      if ok :
+         body += self.form("<", v)
+      v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=True)
+      if not ok :
+         v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=False)
+      if ok :
+         body += self.form(">", v)
+      return body
+
+   def get_not_firing_body_occurrences(self) :
+      if not self.get_occurrences() :
+         return ""
+      body = ""
+      if not self.result is None :
+         self.send_metrics(self.name(), 1, self.level())
+      else :
+         body += "{} RECOVERY due to no results found from query. Recommend you manually validate recovery\n{}".format(self.name(), self.alert_config.url())
+      self.set_occurrences(force=0)
+      return body
+
+   def get_is_firing_body(self) :
+      body = ""
+      if self.thresholds.get_breached(level=Thresholds.UPPER) :
+         body += self.form(">", self.upper_firing)
+      if self.thresholds.get_breached(level=Thresholds.LOWER) :
+         body += self.form("<", self.upper_firing)
+      if self.occurrences_breached :
+         self.debug("Value {} of {} for tag {} has occurred {} time(s) < threshold of {}".format(
+            self.value,
+            self.name(),
+            self.get_tags(),
+            self.get_occurrences(),
+            self.alert_config.occurrences(),
+         ))
+         return ""
+      return body
+
+   def form(self, operator, static) :
+      return "{}\n{:.2f} {}= {}\n{}".format(
+         self.name(),
+         self.value,
+         operator,
+         static,
+         self.alert_config.url(),
+      )
+
+   def set_tags(self, tags) :
+      if tags :
+         self.tags = tags
+      elif self.result :
+         import itertools
+         result_tags = [ self.result['tags'][x] for x in self.alert_config.get_tags() ]
+         chain = itertools.chain(result_tags)
+         sorted_list = sorted(list(chain))
+         self.tags = ", ".join(sorted_list)
+      if not self.tags :
+         self.tags = "instance"
+
+   def get_tags(self) :
+      return self.tags
+
+   def set_firing(self, min_value, max_value) :
+      self.thresholds = Thresholds(self.alert_config)
+      self.thresholds.set_breached(min_value, max_value)
+      self.set_occurrences()
+      self.set_new_level_breached()
+      self.send_metrics()
+      self.send_threshold_metrics()
+
+   def get_firing(self) :
+      return self.thresholds.get_breached() and self.occurrences_breached
+
+   def get_occurrences(self) :
+      tags = self.get_tags()
+      return self.alert_config.get_for_tags(tags)
+
+   def set_occurrences(self, force=None) :
+      previous_occurrences = self.get_occurrences()
+      if self.thresholds.get_breached() :
+         new_occurrences = previous_occurrences+1
+         self.alert_config.set_for_tags(self.get_tags(), new_occurrences)
+         self.occurrences_breached = self.alert_config.occurrences() <= new_occurrences
+      if force :
+         self.alert_config.set_for_tags(self.get_tags(), force)
+         self.alert_config.set_for_tags(self.get_tags()+"_count", force)
+
+   def send_metrics(self, *args, **kwargs) :
+      print("send_metrics not impl")
+
+   def set_new_level_breached(self) :
+      key = self.get_tags()
+      level = self.level()
+      previous_level = self.alert_config.get_level(key)
+      self.new_level_breached = level != previous_level
+      self.alert_config.set_level(key, level)
+      self.info("testInfo: {} {}".format(
+         "NEW" if self.new_level_breached else "EXISTING",
+         self.level(),
+      ))
+
+   def get_new_level_breached(self) :
+      return self.new_level_breached
+
+   def send_threshold_metrics(self) :
+      #  TODO
+      self.send_metrics(self.alert_config.id, self.value)
+      for level in [Thresholds.WARNING, Thresholds.CRITICAL] :
+         for end in [Thresholds.UPPER, Thresholds.LOWER] :
+            v, ok = self.alert_config.get_threshold(isUpper=level == Thresholds.UPPER, isWarning=end == Thresholds.WARNING)
+            if ok :
+               key = "{}_{}_threshold".format(
+                  "upper" if level == Thresholds.UPPER else "lower",
+                  "warning" if level == Thresholds.WARNING else "critical",
+               )
+               self.send_stat(key, v, {'id':self.name()})
--- a/AoM_Service/library/serviceapp/alert_factory.py
+++ b/AoM_Service/library/serviceapp/alert_factory.py
@@ -0,0 +1,13 @@
+from alert import Alert
+
+class Alert_Factory() :
+   def __init__(self, alert_config, logger) :
+      self.alert_config = alert_config
+      self.logger = logger
+      self.info = logger.info
+      self.warning = logger.warning
+      self.debug = logger.debug
+      self.error = logger.error
+
+   def build(self, minvalue, maxvalue, result, tags, availability, alert_tags) :
+      return Alert(self.alert_config, tags, result, minvalue, maxvalue)
--- a/AoM_Service/library/serviceapp/prom_api.py
+++ b/AoM_Service/library/serviceapp/prom_api.py
@@ -0,0 +1,83 @@
+from datetime import datetime, timedelta
+from urllib.parse import urljoin
+
+import requests
+
+
+class PromAPI:
+    def __init__(self, endpoint='http://127.0.0.1:9090/'):
+        """
+        :param endpoint: address of
+        """
+        self.endpoint = endpoint
+
+    @staticmethod
+    def _to_timestamp(input_):
+        """
+        Convert string input to UNIX timestamp for Prometheus
+        :param input_:
+        :return:
+        """
+        if type(input_) == datetime:
+            return input_.timestamp()
+        if input_ == 'now':
+            return datetime.utcnow().isoformat('T')
+        if type(input_) is str:
+            input_ = float(input_)
+        if type(input_) in [int, float]:
+            if input_ > 0:
+                return input_
+            if input_ == 0:  # return now
+                return datetime.utcnow().isoformat('T')
+            if input_ < 0:
+                return (datetime.utcnow() + timedelta(seconds=input_)).isoformat('T')
+        #assert type(input_) == float
+
+    def query(self, query='prometheus_build_info'):
+        return self._get(
+            uri='/api/v1/query',
+            params=dict(
+                query=query
+            )
+        )
+
+    def query_range(self, query='prometheus_build_info', start=-60, end='now', duration=60):
+        """Get ser"""
+        params = {
+            'query': query
+        }
+        if end is not None:
+            params['end'] = self._to_timestamp(end) + 'Z'
+        if start:
+            params['start'] = self._to_timestamp(start) + 'Z'
+        if duration:
+            params['step'] = duration
+        print(params)
+        return self._get(
+            uri='/api/v1/query_range',
+            params=params
+        )
+
+    def series(self, match='prometheus_build_info', start=-86400, end='now'):
+        """Get ser"""
+        params = {
+            'match[]': match
+        }
+        if end is not None:
+            params['end'] = self._to_timestamp(end) + 'Z'
+        if start:
+            params['start'] = self._to_timestamp(start) + 'Z'
+        print(params)
+        return self._get(
+            uri='/api/v1/series',
+            params=params
+        )
+
+    def _get(self, uri, params, method='GET'):
+        url = urljoin(self.endpoint, uri)
+        assert method == 'GET'
+        result = requests.get(
+            url=url,
+            params=params
+        )
+        return result.json()
--- a/AoM_Service/library/serviceapp/service.py
+++ b/AoM_Service/library/serviceapp/service.py
@@ -0,0 +1,949 @@
+""" Alert On Metrics functions"""
+
+import copy
+import itertools
+import json
+import os
+import random
+import smtplib
+from email.mime.text import MIMEText
+from socket import gaierror
+from time import sleep
+from hashlib import md5
+import requests
+from statsd import StatsClient
+from serviceapp.prom_api import PromAPI
+
+alert_status = [
+    'RECOVERY',
+    'WARNING',
+    'WARNING',
+    'CRITICAL',
+    'CRITICAL',
+    'CRITICAL']
+
+
+def build_alert_message(alert, minvalue, maxvalue, result, logger,
+                        availability, tag=None, alert_tags=None):
+    """
+    Build the alert message
+    Args:
+        alert: the alert object that includes a tag definition
+        minvalue: the min value to test against the threshold
+        maxvalue: the max value to test against the threshold
+        result: the response back from kairosdb
+        logger (log object): does the logging
+        availability: Send availability stat 1
+        tag: If passed in will use this value for the tag instead of
+        getting it from the result object
+        alert_tags: the tags corresponding to the result, used if an
+        alert has to be triggered and a custom routing per tag is configured
+    Returns:
+        Alert message string
+    """
+    # DEFAULT TO MAX VALUE AS THE VALUE WE WILL ALERT ON. LOGIC BELOW
+    # MAY CHANGE THIS.
+    #    value = maxvalue
+    #    # HANDLE THE CASE WHERE SOMEONE HAS NOT SPECIFIED ANY TAGS IN THEIR QUERY
+    #    # (USUALLY A GLOBAL ALL-DC QUERY)
+    #    if tag is None and result is not None:
+    #        tag = ', '.join(sorted(list(itertools.chain(
+    #            *[result['tags'][x] for x in alert['tags']]))))
+    #    tag_count = tag + "_count"
+    #    WE WILL USE THIS ONE LATER FOR TRACKING OCCURRENCES OF KAIROSDB NOT
+    #    RETURNING RESULTS
+    #    tag_noresult = tag + "_noresult"
+    #    if not tag:
+    #        tag = 'instance'
+    #        logger.debug("No tag specified for alert {}".format(alert['id']))
+    # INSTEAD OF TRYING TO HANDLE LOGIC WHERE THESE ARE NOT IN THE OBJECT, PUT
+    # THEM IN AS SOON AS THEY ARE CREATED SO THAT ON FIRST RUN AN ALERT HAS ALL
+    # THE ALERT['alert_tags'][TAG] AND ALERT['alert_tags'][TAG_COUNT] NEEDED
+    #    if 'alert_tags' not in alert:
+    #        alert['alert_tags'] = {}
+    #    if tag not in alert['alert_tags']:
+    #        alert['alert_tags'][tag] = 0
+    #    if tag_count not in alert['alert_tags']:
+    #        alert['alert_tags'][tag_count] = 0
+    # IF WE HIT THIS FUNCTION THEN WE ALWAYS SET (OR RESET) THIS NORESULT
+    # COUNTER TO 0 IE. IF WE ARE HERE IT IMPLIES WE HAVE A RESULT FROM
+    # KAIROSDB OR WE ARE AT THE END OF A LONG PERIOD OF NORESULTS WHERE WE ARE
+    # CLEARING EVERYTHING OUT ANYWAY
+    #    alert['alert_tags'][tag_noresult] = 0
+
+    #    # FIRST FIND OUT WHAT THRESHOLDS ARE SET AND HAVE BEEN BREACHED
+    #    upper_critical_threshold = None
+    #    upper_warning_threshold = None
+    #    lower_warning_threshold = None
+    #    lower_critical_threshold = None
+    #    upper_threshold = None
+    #    lower_threshold = None
+    #    is_warning_alarm = False
+    #    is_critical_alarm = False
+
+    #    # UPPER
+    #    upper_threshold_exists = False
+    #    upper_warning_threshold_breached = False
+    #    upper_critical_threshold_breached = False
+    #    if 'warning_upper_threshold' in alert:
+    #        upper_threshold_exists = True
+    #        upper_warning_threshold = alert['warning_upper_threshold']
+    #        upper_threshold = upper_warning_threshold
+    #        if maxvalue >= upper_warning_threshold:
+    #            upper_warning_threshold_breached = True
+    #            is_warning_alarm = True
+    #    if 'critical_upper_threshold' in alert:
+    #        upper_critical_threshold = alert['critical_upper_threshold']
+    #        if not upper_threshold_exists:
+    #            upper_threshold = upper_critical_threshold
+    #        upper_threshold_exists = True
+    #        # IF CONFIG HAS A CRITICAL THRESHOLD SET AND WE PASS THAT THEN THAT IS
+    #        # OUR THRESHOLD FOR ALERTING
+    #        if maxvalue >= alert['critical_upper_threshold']:
+    #            upper_threshold = upper_critical_threshold
+    #            upper_critical_threshold_breached = True
+    #            is_critical_alarm = True
+    #    upper_threshold_breached = (upper_warning_threshold_breached
+    #                                or upper_critical_threshold_breached)
+
+    #    # LOWER
+    #    lower_threshold_exists = False
+    #    lower_warning_threshold_breached = False
+    #    lower_critical_threshold_breached = False
+    #    if 'warning_lower_threshold' in alert:
+    #        lower_threshold_exists = True
+    #        lower_warning_threshold = alert['warning_lower_threshold']
+    #        lower_threshold = lower_warning_threshold
+    #        if minvalue <= lower_warning_threshold:
+    #            lower_warning_threshold_breached = True
+    #            is_warning_alarm = True
+    #    if 'critical_lower_threshold' in alert:
+    #        lower_critical_threshold = alert['critical_lower_threshold']
+    #        if not lower_threshold_exists:
+    #            lower_threshold = lower_critical_threshold
+    #        lower_threshold_exists = True
+    #        # IF CONFIG HAS A CRITICAL THRESHOLD SET AND WE PASS THAT THEN THAT AS
+    #        # OUR THRESHOLD FOR ALERTING
+    #        if minvalue <= lower_critical_threshold:
+    #            lower_threshold = lower_critical_threshold
+    #            lower_critical_threshold_breached = True
+    #            is_critical_alarm = True
+    #    lower_threshold_breached = (lower_warning_threshold_breached or
+    #                                lower_critical_threshold_breached)
+
+    #    # THIS HAS TO MEAN THERE IS A PROBLEM WITH THE ALERT CONFIG
+    #    if lower_threshold is None and upper_threshold is None:
+    #        logger.debug(
+    #            "ERROR: alert {} does not have any thresholds set on {}".format(
+    #                alert['id'], tag))
+
+    #    # ON TO OCCURRENCES
+    #    if 'occurrences_threshold' in alert:
+    #        occurrences_threshold = alert['occurrences_threshold']
+    #    else:
+    #        occurrences_threshold = 1
+
+    #    alert_entity = "Metric: {} for {}".format(alert['id'], tag)
+
+    #    if 'url' not in alert:
+    #        alert['url'] = os.environ['AOM_GRAFANA_URL'] + str(alert['id'])
+
+    # ====================
+    # PREPARE ALERT BODY STRING AND SET THE VALUE WE WILL USE TO ALERT WITH
+    # ====================
+    #    alert_body = ''
+    #    if upper_threshold_breached:
+    #        alert_body = "{}\n{:.2f} >= {}\n{}".format(
+    #            alert_entity, value, upper_threshold, alert['url'])
+    #    if lower_threshold_breached:
+    #        value = minvalue
+    #        alert_body = "{}\n{:.2f} <= {}\n{}".format(
+    #            alert_entity, value, lower_threshold, alert['url'])
+
+    # SEND SOME STATS OUT AT THIS POINT AS WE KNOW WHERE WE ARE NOW. SEND THE
+    # THRESHOLDS TOO SO THEY CAN BE GRAPHED
+    ###  BREEL TODO ###
+    # if result is not None:
+    #     send_metrics(alert, value, result)
+    #     if 'critical_upper_threshold' in alert:
+    #         send_stat('upper_critical_threshold', upper_critical_threshold,
+    #                   {'id': alert['id']})
+    #     if 'warning_upper_threshold' in alert:
+    #         send_stat('upper_warning_threshold', upper_warning_threshold,
+    #                   {'id': alert['id']})
+    #     if 'critical_lower_threshold' in alert:
+    #         send_stat('lower_critical_threshold', lower_critical_threshold,
+    #                   {'id': alert['id']})
+    #     if 'warning_lower_threshold' in alert:
+    #         send_stat('lower_warning_threshold', lower_warning_threshold,
+    #                   {'id': alert['id']})
+
+    # ====================
+    # APPLY OUR LOGIC TO MAKE SOME DECISIONS
+    # ====================
+    #current_alert_status = alert_status[0]
+    #if not lower_threshold_breached and not upper_threshold_breached:
+    #    # if result is not None:
+    #    #     if lower_threshold_exists and not upper_threshold_exists:
+    #    #         alert_body = "{}\n{:.2f} > {}\n{}".format(
+    #    #             alert_entity, value, lower_threshold, alert['url'])
+    #    #         logger.debug("GOOD: alert {} is higher than lower threshold {}"
+    #    #                      "for value {} on tag {}".format(
+    #    #                          alert['id'], lower_threshold, value, tag))
+    #    #     if upper_threshold_exists and not lower_threshold_exists:
+    #    #         alert_body = "{}\n{:.2f} < {}\n{}".format(
+    #    #             alert_entity, value, upper_threshold, alert['url'])
+    #    #         logger.debug("GOOD: alert {} is below the upper threshold {} "
+    #    #                      "for value {} on tag {}".format(
+    #    #                          alert['id'], upper_threshold, value, tag))
+    #    #     if upper_threshold_exists and lower_threshold_exists:
+    #    #         alert_body = "{}\n{} < {:.2f} < {}\n{}".format(
+    #    #             alert_entity, lower_threshold, value, upper_threshold,
+    #    #             alert['url'])
+    #    #         logger.debug("GOOD: alert {} is between thresholds {} and {} "
+    #    #                      "for value {} on tag {}".format(
+    #    #                          alert['id'], upper_threshold, lower_threshold,
+    #    #                          value, tag))
+    #    # CHECK AND SEE IF TAG LOGIC IS SET, IE. WE WERE PREVIOUSLY IN ALARM
+    #    # STATE
+    #    #if alert['alert_tags'][tag] > 0:
+    #    #    if result is not None:
+    #    #        send_metrics(alert, 1, result, current_alert_status)
+    #    #    logger.info(
+    #    #        "TestInfo: RECOVERY: Clearing values for [{}] - {}".format(
+    #    #            alert['id'], tag))
+    #    #    if result is None:
+    #    #        alert_body = ("{} RECOVERY due to no results found from "
+    #    #                      "KairosDB query. Recommend you manually validate"
+    #    #                      "recovery.\n{}").format(
+    #    #                          alert_entity, alert['url'])
+    #    #    alert['alert_tags'][tag] = 0
+    #    #    alert['alert_tags'][tag_count] = 0
+    #    #    if availability:
+    #    #        logger.info("Sending availability stat 1")
+    #    #        send_metrics(alert, 1, result, 'service_level')
+    #    #else:
+    #    #    # WE RETURN NONE IF NO ALERT (EITHER RECOVERY OR WARNING OR
+    #    #    # CRITICAL) NEEDS TO BE FIRED
+    #    #    alert['alert_tags'][tag_count] = 0
+    #    #    if availability:
+    #    #        logger.info("Sending availability stat 1")
+    #    #        send_metrics(alert, 1, result, 'service_level')
+    #    #    return None
+    #else:
+         ###   BREEL WORKING HERE   ###
+        # ====================
+        # SET KEY / VALUE FOR TAG ON ALERT
+        # 0 == No Alert
+        # 1 == Warning
+        # 2 == Existing Warning Alert
+        # 3 == New Critical
+        # 4+ == Existing Critical Alert
+        # ====================
+        # CHECK IF TAG_COUNT HAS BEEN SET, IF NOT SET IT, IF SO INCREMENT IT
+        #   alert['alert_tags'][tag_count] += 1
+
+        # ALERT WONT FIRE UNLESS THE TAG_COUNT IS MORE THAN THE OCCURRENCES,
+        # THAT BEING EITHER 1 OR WHATEVER WAS SET ALERT HAS EXCEEDED
+        # OCCURRENCES SO RETURN IT
+        #   TODO this doesnt belog in Alert.py
+        #if alert['alert_tags'][tag_count] >= occurrences_threshold:
+        #    # >= 4 MEANS THIS IS A KNOWN CRITICAL, SO NO-OP
+        #    if alert['alert_tags'][tag] < 4:
+        #        if is_warning_alarm and not is_critical_alarm:
+        #            # THIS HANDLES GOING STRAIGHT FROM NORMAL TO WARNING LEVEL
+        #            if alert['alert_tags'][tag] == 0:
+        #                # NEW WARNING
+        #                alert['alert_tags'][tag] = 1
+        #                logger.info("TestInfo: WARNING (NEW): {} - {}".format(
+        #                    alert['id'], tag))
+        #            else:
+        #                # EXISTING WARNING
+        #                alert['alert_tags'][tag] = 2
+        #                logger.info("TestInfo: WARNING (EXISTING): {} - {}".format(
+        #                    alert['id'], tag))
+        #        if is_critical_alarm:
+        #            # THIS HANDLES GOING FROM WARNING LEVEL TO CRITICAL LEVEL
+        #            if (alert['alert_tags'][tag] == 1 or
+        #                    alert['alert_tags'][tag] == 2):
+        #                alert['alert_tags'][tag] = 3
+        #                logger.info("TestInfo: CRITICAL (WAS WARNING): {} - {}".format(
+        #                    alert['id'], tag))
+        #            else:
+        #                # THIS HANDLES GOING STRAIGHT FROM NORMAL TO CRITICAL
+        #                # LEVEL
+        #                if alert['alert_tags'][tag] < 3:
+        #                    # NEW CRITICAL
+        #                    alert['alert_tags'][tag] = 3
+        #                    logger.info("TestInfo: CRITICAL (NEW): {} - {}".format(
+        #                        alert['id'], tag))
+        #                else:
+        #                    # EXISTING CRITICAL
+        #                    alert['alert_tags'][tag] = 4
+        #                    logger.info("TestInfo: CRITICAL (EXISTING): {} - {}".format(
+        #                        alert['id'], tag))
+            # RECORD THE FACT THAT SOMETHING IS STILL IN ALARM STATE IN METRICS
+            # EVEN IF NOT ACTIVELY ALERTING ON IT
+            #  #if is_critical_alarm:
+            #      #current_alert_status = alert_status[3]
+            #      #send_metrics(alert, 2, result, current_alert_status)
+            #      #if availability:
+            #      #    logger.info("Sending availability stat 0")
+            #      #    send_metrics(alert, 0, result, 'service_level')
+            #  #if is_warning_alarm and not is_critical_alarm:
+            #      #current_alert_status = alert_status[1]
+            #      #send_metrics(alert, 1, result, current_alert_status)
+            #      #if availability:
+            #      #    logger.info("Sending availability stat 1")
+            #      #    send_metrics(alert, 1, result, 'service_level')
+            #  logger.debug("{} alert for value {} of {} for tag {} has occurred "
+            #               "{} times. Threshold is >= {} times.".format(
+            #                   current_alert_status,
+            #                   value,
+            #                   alert['id'],
+            #                   tag,
+            #                   alert['alert_tags'][tag_count],
+            #                   occurrences_threshold))
+    #    else:
+    #        # WE RETURN NONE IF NO ALERT (EITHER RECOVERY OR WARNING OR
+    #        # CRITICAL) NEEDS TO BE FIRED
+    #        logger.debug("Value {} of {} for tag {} has occurred {} time(s) < "
+    #                     "threshold of {}".format(
+    #                         value,
+    #                         alert['id'],
+    #                         tag,
+    #                         alert['alert_tags'][tag_count],
+    #                         occurrences_threshold))
+    #        if availability:
+    #            logger.info("Sending availability stat")
+    #            send_metrics(alert, 1, result, 'service_level')
+    #        return None
+
+    #logger.debug(
+    #    "Alert {}->[{}]->{}, Occurrences={}".format(
+    #        alert['id'], tag, current_alert_status,
+    #        alert['alert_tags'][tag_count]))
+    #return alert_entity, alert_body, alert['alert_tags'][tag], alert_tags, md5(tag.encode('utf-8')).hexdigest()[:10]
+
+
+def check_kairosdb_alert(
+        alert_config,
+        service_config,
+        logger,
+        production_mode=True):
+    """
+    Args:
+        alert_config (dict): Config of the alert to run
+        service_config (dict): Holds things like urls, tokens and other things
+        logger (log object): does the logging
+    Returns:
+        None
+    """
+    availability = False
+    # SLEEP A RANDOM TIME BETWEEN 0 AND INTERVAL SO THAT ALL ALERTS DON'T
+    # START AT THE SAME TIME
+    wait_time = random.randint(0, alert_config['interval'])
+    logger.info(
+        "ALERT_CONFIG: {}\tsleep: {}".format(
+            alert_config['id'],
+            wait_time))
+    sleep(wait_time)
+    # For metrics with availability set to true, we default the interval to 5
+    # mins due Grafana limitations
+    if 'availability' in alert_config and alert_config['availability']:
+        availability = True
+    # ====================
+    # EACH CHECK JUST LOOPS
+    # ====================
+    ret = None
+    while True:
+        try:
+            send_stat("check_run", 1, {'id': alert_config['id']})
+            # BUILD URL FOR KAIROSDB METRICS AND QUERY FOR RESULTS
+            query_url = os.path.join(
+                service_config['kairosdb_url'] +
+                "api/v1/datapoints/query")
+            ret = requests.post(
+                query_url,
+                data=json.dumps(
+                    alert_config['query']),
+                timeout=service_config['timeout'])
+            assert ret.status_code == 200
+
+            # GOT DATA BACK, NOW TO COMPARE IT TO THE THRESHOLD
+            results = ret.json()['queries'][0]['results']
+            logger.debug(
+                "Got back {} results for alert {}".format(
+                    len(results), alert_config['id']))
+            log_alert_results(results, alert_config, logger)
+            alert_list = []
+
+            # LOOP THROUGH ALL THE RESULTS
+            for r in results:
+                alert_tags = (get_alert_tags(alert_config, r)
+                              if has_custom_alert_routing(alert_config) else None)
+
+                # OUR QUERY RETURNED SOME VALUES - FIND MIN AND MAX VALUES
+                # THEREIN AND EXAMINE FOR FAILURE
+                if r['values']:
+                    minvalue = min([x[1] for x in r['values']])
+                    maxvalue = max([x[1] for x in r['values']])
+                    # SEND VALUES TO BUILD_ALERT_MESSAGE, WHICH RETURNS NONE OR
+                    # AN OBJECT
+                    alert_list.append(
+                        build_alert_message(
+                            alert_config,
+                            minvalue,
+                            maxvalue,
+                            r,
+                            logger,
+                            availability,
+                            alert_tags=alert_tags))
+
+                # THIS MEANS OUR KAIROS QUERY RETURNED NOTHING. COULD BE NETWORK
+                # ISSUES. WE WILL TOLERATE THIS FOR X OCCURRENCES. (X=10)
+                # AFTER X OCCURRENCES OF KAIROS NOT RETURNING DATA WE WILL CLEAR
+                # AOM'S BRAIN FOR THIS ALERT ID AND TAG COMBINATION TO AVOID A
+                # LATER OCCURRENCE CAUSING A PREMATURE ALERT.
+                # A NO-OP IF NO HISTORY.
+                elif 'alert_tags' in alert_config:
+                    for key in alert_config['alert_tags']:
+                        if ('count' not in key and 'noresult' not in key and
+                                alert_config['alert_tags'][key] > 0):
+                            key_noresult = key + "_noresult"
+                            key_count = key + "_count"
+                            if alert_config['alert_tags'][key_noresult] > 10:
+                                logger.info("{} occurrences of no results back "
+                                            "for {}, clear out counts for tag '{}'".format(
+                                                alert_config['alert_tags'][key_noresult],
+                                                alert_config['id'], key))
+                                alert_list.append(
+                                    build_alert_message(
+                                        alert_config,
+                                        0,
+                                        0,
+                                        None,
+                                        logger,
+                                        availability,
+                                        key,
+                                        alert_tags=alert_tags))
+                                alert_config['alert_tags'][key] = 0
+                                alert_config['alert_tags'][key_count] = 0
+                                alert_config['alert_tags'][key_noresult] = 0
+                            else:
+                                alert_config['alert_tags'][key_noresult] += 1
+                                logger.info("{} occurrences of no results back "
+                                            "for {}, tag '{}'".format(
+                                                alert_config['alert_tags'][key_noresult],
+                                                alert_config['id'], key))
+
+            # SEND ALL ALERTS FOUND TO THE ALERT HANDLERS THAT ARE NOT NONE
+            for alert in [x for x in alert_list if x is not None]:
+                if production_mode:
+                    send_alerts(
+                        alert,
+                        copy.deepcopy(alert_config),
+                        service_config['victorops_url'],
+                        service_config['slack_url'],
+                        service_config['slack_token'],
+                        service_config['smtp_server'],
+                        service_config['sensu_endpoint'],
+                        service_config['uchiwa_url'],
+                        logger)
+                else:
+                    logger.info(
+                        "Sending alert for: {}".format(
+                            alert_config.get('id')))
+
+        # HANDLE THE UNEXPECTED
+        except TimeoutError:
+            logger.error("Query [{}] took to long to run".format(
+                alert_config['id']))
+        except AssertionError:
+            logger.error(
+                "KairsoDB query failed: {}\n"
+                "HTTP status code:\t{}\n"
+                "Error Message:\t{}\nQuery:\n"
+                "{}".format(
+                    ret.url,
+                    ret.status_code,
+                    ret.text,
+                    alert_config['query']))
+        except gaierror:
+            logger.error(
+                "Unable to connect to smtp server: {}".format(
+                    service_config['smtp_server']))
+        except Exception as e:
+            logger.error(
+                "Unhandled exception {} on alert: {}".format(
+                    str(e), alert_config['id']))
+        finally:
+            sleep(alert_config['interval'])
+
+
+def check_prometheus_alert(
+        alert_config,
+        service_config,
+        logger,
+        production_mode=True):
+    """
+    Args:
+        alert_config (dict): Config of the alert to run
+        service_config (dict): Holds things like urls, tokens and other things
+        logger (log object): does the logging
+    Returns:
+        None
+    """
+    # SLEEP A RANDOM TIME BETWEEN 0 AND INTERVAL SO THAT ALL ALERTS DON'T
+    # START AT THE SAME TIME
+    wait_time = random.randint(0, alert_config['interval'])
+    logger.info(
+        "ALERT_CONFIG: {}\tsleep: {}".format(
+            alert_config['id'],
+            wait_time))
+    sleep(wait_time)
+    # For metrics with availability set to true, we default the interval to 5
+    # mins due to Grafana limitations
+    availability = bool(alert_config.get('availability'))
+
+    # ====================
+    # EACH CHECK JUST LOOPS
+    # ====================
+    ret = None
+    while True:
+        try:
+            send_stat("check_run", 1, {'id': alert_config['id']})
+            prom_api = PromAPI(endpoint=alert_config['prometheus_url'])
+            ret = prom_api.query_range(
+                query=alert_config['query'],
+                start=alert_config['start_time'],
+                end=alert_config['end_time'],
+                duration=alert_config['interval'])
+
+            assert ret['status'] == 'success'
+
+            # GOT DATA BACK, NOW TO COMPARE IT TO THE THRESHOLD
+            results = ret['data']['result']
+            logger.debug(
+                "Got back {} results for alert {}".format(
+                    len(results), alert_config['id']))
+            log_alert_results(results, alert_config, logger)
+            alert_list = []
+
+            # LOOP THROUGH ALL THE RESULTS
+            for r in results:
+                alert_tags = (get_alert_tags(alert_config, r) if
+                              has_custom_alert_routing(alert_config) else None)
+
+                # REARRANGE RESULT TO MORE CLOSELY MATCH KAIROSDB RESULT
+                r['tags'] = {key: [value]
+                             for (key, value) in r['metric'].items()}
+
+                # OUR QUERY RETURNED SOME VALUES - FIND MIN AND MAX VALUES
+                # THEREIN AND EXAMINE FOR FAILURE
+                if r['values']:
+                    raw_values = [value for _, value in r['values']]
+                    min_value = float(min(raw_values))
+                    max_value = float(max(raw_values))
+                    # SEND VALUES TO BUILD_ALERT_MESSAGE, WHICH RETURNS NONE OR
+                    # AN OBJECT
+                    alert_list.append(
+                        build_alert_message(
+                            alert_config,
+                            min_value,
+                            max_value,
+                            r,
+                            logger,
+                            availability,
+                            alert_tags=alert_tags))
+
+                # THIS MEANS OUR QUERY RETURNED NOTHING. COULD BE NETWORK ISSUES
+                # WE WILL TOLERATE THIS FOR X OCCURRENCES. (X=10)
+                # AFTER X OCCURRENCES OF NOT RETURNING DATA WE WILL CLEAR AOM'S
+                # BRAIN FOR THIS ALERT ID AND TAG COMBINATION TO AVOID A LATER
+                # OCCURRENCE CAUSING A PREMATURE ALERT. A NO-OP IF NO HISTORY.
+                elif 'alert_tags' in alert_config:
+                    for key in alert_config['alert_tags']:
+                        if ('count' not in key and 'noresult' not in key and
+                                alert_config['alert_tags'][key] > 0):
+                            key_noresult = key + "_noresult"
+                            key_count = key + "_count"
+                            if alert_config['alert_tags'][key_noresult] > 10:
+                                logger.info("{} occurrences of no results back "
+                                            "for {}, clear out counts for tag '{}'".format(
+                                                alert_config['alert_tags'][key_noresult],
+                                                alert_config['id'], key))
+                                alert_list.append(
+                                    build_alert_message(
+                                        alert_config,
+                                        0,
+                                        0,
+                                        None,
+                                        logger,
+                                        availability,
+                                        key,
+                                        alert_tags=alert_tags))
+                                alert_config['alert_tags'][key] = 0
+                                alert_config['alert_tags'][key_count] = 0
+                                alert_config['alert_tags'][key_noresult] = 0
+                            else:
+                                alert_config['alert_tags'][key_noresult] += 1
+                                logger.info("{} occurrences of no results back "
+                                            "for {}, tag '{}'".format(
+                                                alert_config['alert_tags'][key_noresult],
+                                                alert_config['id'], key))
+
+            # SEND ALL ALERTS FOUND TO THE ALERT HANDLERS THAT ARE NOT NONE
+            for alert in [x for x in alert_list if x is not None]:
+                if production_mode:
+                    send_alerts(
+                        alert,
+                        copy.deepcopy(alert_config),
+                        service_config['victorops_url'],
+                        service_config['slack_url'],
+                        service_config['slack_token'],
+                        service_config['smtp_server'],
+                        service_config['sensu_endpoint'],
+                        service_config['uchiwa_url'],
+                        logger)
+                else:
+                    logger.info(
+                        "Sending alert {}".format(
+                            alert_config.get('id')))
+
+        # HANDLE THE UNEXPECTED
+        except TimeoutError:
+            logger.error(
+                "Query [{}] took to long to run".format(
+                    alert_config['id']))
+        except AssertionError:
+            logger.error(
+                "Prometheus query failed:\n"
+                "Status:\t{}\n"
+                "Error Type:\t{}\n"
+                "Error Message:\t{}\n"
+                "Query:\n{}".format(
+                    ret['status'],
+                    ret['errorType'],
+                    ret['error'],
+                    alert_config['query']))
+        except gaierror:
+            logger.error(
+                "Unable to connect to smtp server: {}".format(
+                    service_config['smtp_server']))
+        except Exception as e:
+            logger.error(
+                "Unhandled exception {} on alert: {}".format(
+                    str(e), alert_config['id']))
+        finally:
+            sleep(alert_config['interval'])
+
+
+# LOG ALERT RESULTS SO WE CAN DEBUG IF NEEDED
+def log_alert_results(results, alert_config, logger):
+    """
+    Logs the results broken out by tag provided in the alert_config to the
+    logger for debuging
+    Args:
+        results: the results object returned from the call to kairosdb, of just
+        the results
+        alert_config: config object of the alert
+        logger (log object): does the logging
+    Returns:
+        None, logs to logger
+    """
+
+    for v in results:
+        logger.debug("{} - Result: {}".format(alert_config['id'], v))
+
+
+def send_alerts(
+        alert,
+        alert_config,
+        victorops_url,
+        slack_url,
+        slack_token,
+        smtp_server,
+        sensu_endpoint,
+        uchiwa_url,
+        logger):
+    """
+    Sends out the alerts to VO, Email, and/or Slack
+    Args:
+        alert: the alert tuple:
+            alert[0] == subject alert[1] == body alert[3] == alert_tags alert[4] == md5sum
+        alert_config: the alert configuration object
+        victorops_url: url to victorops
+        slack_url: url to slack api calls
+        slack_token: the token for the alert
+        smtp_server: The server to send mail messages too
+        sensu_endpoint:
+        uchiwa_url:
+        logger (log object): does the logging
+    Returns: None
+    """
+    # GOING TO USE THIS FOR TAGGING SOME METRICS ABOUT WHAT ALERT CHANNEL WAS
+    # USED
+    tag_dict = dict()
+    tag_dict['alert'] = alert_config['id']
+
+    is_custom_alert_routing = has_custom_alert_routing(alert_config)
+    if is_custom_alert_routing:
+        alert_routing = alert_config.get('alert_routing_lookup', {})
+        alert_config['alerts'] = alert_routing.get(
+            alert[3], alert_config['alerts']['lookup']['default'])
+
+    # once we move all alerts into sensu, we dont need to tho this
+    if 'filters' in alert_config:
+        logger.info(
+            "alert_status : {}, alert_config: {}".format(
+                alert[2], alert_config))
+        if 'slack_subdue' in alert_config['filters'] and alert[2] in (
+                1, 2) and alert_config['filters']['slack_subdue']:
+            # unless the alert is critical we dont send it
+            logger.info("Removed slack, alert_config: {}".format(alert_config))
+            alert_config['alerts'].pop('slack', None)
+        if ('victorops_subdue' in alert_config['filters'] and
+                alert[2] in (1, 2) and
+                alert_config['filters']['victorops_subdue']):
+            # unless the alert is critical we dont send it
+            alert_config['alerts'].pop('vo', None)
+            logger.info("Removed vo, alert_config: {}".format(alert_config))
+
+    # ====================
+    # VICTOROPS HANDLING
+    # ====================
+    if 'vo' in alert_config['alerts']:
+        for notify in alert_config['alerts']['vo']:
+            payload = dict(entity_id=alert[0],
+                           message_type=alert_status[alert[2]],
+                           state_message=alert[1])
+            r = None
+            try:
+                r = requests.post(
+                    victorops_url + notify,
+                    data=json.dumps(payload),
+                    headers={
+                        "Content-type": "application-json"})
+                assert r.status_code == 200
+                # Record a VO alert sent event
+                tag_dict['alert_channel_type'] = "VictorOps"
+                tag_dict['who'] = "vo:{}".format(notify)
+                send_stat("alert_channel", 1, tag_dict)
+                # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
+            except AssertionError:
+                logger.error(
+                    "Post to VO failed for {}\n{}:\t{}".format(
+                        alert_config['id'], r.status_code, r.text))
+            except Exception as e:
+                logger.error("Unhandled exception for alert_id:{} "
+                             "when posting to VO: {}".format(
+                                 alert_config['id'], str(e)))
+
+    # ====================
+    # EMAIL HANDLING
+    # ====================
+    if 'email' in alert_config['alerts'] and (
+            alert[2] == 0 or alert[2] == 1 or alert[2] == 3):
+        msg = MIMEText(alert[1])
+        msg['Subject'] = '{} Status: {}'.format(
+            alert[0], alert_status[alert[2]])
+        msg['From'] = 'aom@qualtrics.com'
+        msg['To'] = ','.join(
+            [x + "@qualtrics.com" for x in alert_config['alerts']['email']])
+        try:
+            s = smtplib.SMTP(smtp_server)
+            s.send_message(msg)
+            s.quit()
+            # Record an Email alert sent event
+            tag_dict['alert_channel_type'] = "Email"
+            tag_dict['who'] = "email:{}".format(msg['To'])
+            send_stat("alert_channel", 1, tag_dict)
+            # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
+        except Exception as e:
+            logger.error(
+                "Unhandled exception when sending mail for {} to {}\n{}".format(
+                    alert_config['id'], smtp_server, str(e)))
+
+    # ====================
+    # SENSU HANDLING
+    # ====================
+    if 'sensu' in alert_config['alerts']:
+        # Dictionary with static values for Sensu
+        sensu_dict = {
+            'source': 'AOM',
+            'refresh': 3600,
+            'occurrences': 1,
+            'name': alert_config['id']+'__'+alert[4]}
+        # if alert[3]:
+        #     logger.info(alert)
+        #     sensu_dict['name'] = '_'.join(
+        #         [alert_config['id']] + sorted(list(alert[3])))
+        if 'refresh' in alert_config:
+            sensu_dict['refresh'] = alert_config['refresh']
+        sensu_dict['interval'] = alert_config['interval']
+        sensu_dict['handlers'] = []
+        sensu_dict['dashboard'] = alert_config['url']
+        if 'dependencies' in alert_config['alerts']['sensu'].keys():
+            sensu_dict['dependencies'] = (alert_config['alerts']
+                                          ['sensu']['dependencies'])
+        if 'victorops' in alert_config['alerts']['sensu'].keys():
+            sensu_dict['handlers'].append("victorops")
+            sensu_dict['routing_key'] = (alert_config['alerts']
+                                         ['sensu']['victorops'])
+        # # Leave this here until we have email support in Sensu
+        # if 'email' in alert_config['alerts']['sensu'].keys():
+        #     sensu_dict['handlers'].append("email")
+        #     # verify this option
+        #     sensu_dict['email'] = alert_config['alerts']['sensu']['email']
+        if 'slack' in alert_config['alerts']['sensu'].keys():
+            sensu_dict['handlers'].append("slack")
+            sensu_dict['slack_channel'] = (
+                alert_config['alerts']['sensu']['slack'])
+            # Format alert message
+            sensu_dict['dashboard'] = (
+                "<{}|here> , Uchiwa: <{}?check={}|here> ".format(
+                    alert_config['url'], uchiwa_url, alert_config['id']))
+        if 'jira' in alert_config['alerts']['sensu'].keys():
+            sensu_dict['handlers'].append("jira")
+            sensu_dict.update(alert_config['alerts']['sensu']['jira'])
+        if 'filters' in alert_config:
+            sensu_dict['filters'] = alert_config['filters']
+        # 0 = OK, 1 = WARNING, 2 = CRITICAL
+        sensu_status = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2}
+        sensu_dict['status'] = sensu_status[alert[2]]
+        sensu_dict['output'] = alert[1]
+
+        r = None
+        try:
+            user = os.environ['API_USER']
+            passwd = os.environ['API_PASS']
+            r = requests.post(
+                sensu_endpoint,
+                json.dumps(sensu_dict),
+                auth=(
+                    user,
+                    passwd))
+            assert r.status_code == 202
+        except AssertionError:
+            logger.error(
+                "Post to Sensu failed  {}\n{}:\t{}".format(
+                    alert_config['id'],
+                    r.status_code,
+                    r.text))
+        except Exception as e:
+            logger.error("Unhandled exception for alert_id:{} "
+                         "when posting to Sensu: {}".format(
+                             alert_config['id'], str(e)))
+
+    # ====================
+    # SLACK HANDLING - all Slack alerts will go through Sensu
+    # ====================
+    if 'slack' in alert_config['alerts'] and (
+            alert[2] == 0 or alert[2] == 1 or alert[2] == 3):
+        refresh = alert_config.get('refresh', 3600)
+        dashboard = alert_config.get('url', '')
+        sensu_status = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2}
+        sensu_dict2 = {'handlers': ['slack'],
+                       'interval': alert_config['interval'],
+                       'source': 'AOM',
+                       'refresh': refresh,
+                       'occurrences': 1,
+                       'name': alert_config['id']+'__'+alert[4],
+                       'dashboard': dashboard,
+                       'status': sensu_status[alert[2]],
+                       'output': alert[1]}
+        if is_custom_alert_routing:
+            sensu_dict2['name'] = '_'.join(
+                [alert_config['id']] + list(alert[3]))
+        sensu_dict2['dashboard'] = (
+            "<{}|here> , Uchiwa: <{}?check={}|here> ".format(
+                alert_config['url'], uchiwa_url, alert_config['id']))
+        for channel in alert_config['alerts']['slack']:
+            sensu_dict2['slack_channel'] = channel
+            r = None
+            try:
+                user = os.environ['API_USER']
+                passwd = os.environ['API_PASS']
+                r = requests.post(
+                    sensu_endpoint,
+                    json.dumps(sensu_dict2),
+                    auth=(
+                        user,
+                        passwd))
+                assert r.status_code == 202
+            except AssertionError:
+                logger.error(
+                    "Post to Sensu failed  {}\n{}:\t{}".format(
+                        alert_config['id'], r.status_code, r.text))
+            except Exception as e:
+                logger.error("Unhandled exception for alert_id:{} when posting"
+                             "to Sensu: {}".format(alert_config['id'], str(e)))
+
+# payload = dict(token=slack_token, channel=channel,
+#                text="{} Status: {}".format(alert[1], alert_status[alert[2]]))
+# r = None
+# try:
+#     r = requests.post(slack_url, data=payload)
+#     assert r.status_code == 200
+#     # Record an Slack alert sent event
+#     tag_dict['alert_channel_type'] = "Slack"
+#     tag_dict['who'] = "slack:{}".format(channel)
+#     send_stat("alert_channel", 1, tag_dict)
+#     # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
+# except AssertionError:
+#     logger.error("Post to Slack failed for {}\n{}:\t{}".format(alert_config['id'], r.status_code, r.text))
+# except Exception as e:
+#     logger.error("Unhandled exception for alert_id:{} when posting to Slack: {}".format(alert_config['id'],
+# str(e)))
+
+
+def send_metrics(alert, value, result, gaugename='stats'):
+    """
+    Sends the results from the alert check to statsd
+    Args:
+        alert: The Alert config object that holds the alert['tag'] value.
+        gaugename: The name of the gauge metric we send.
+        value: The value we want to send as a gauge.
+        result: The result object from making the call. Use the data in this
+        object to tag the metric.
+    Returns: None
+    """
+    # GROUP ALL THE ALERTS TOGETHER SO THAT PEEPS CAN FILTER OUT BY TAG THEIR
+    # SPECIFIC ALERTS
+    result_tags = list(itertools.chain(
+        *[result['tags'][x] for x in alert['tags']]))
+    tag_dict = dict()
+    for x in range(len(alert['tags'])):
+        tag_dict[alert['tags'][x]] = result_tags[x]
+    tag_dict['alert'] = alert['id']
+
+    # SEND THE METRIC
+    send_stat(gaugename, value, tag_dict)
+
+
+def send_stat(gaugename, value, tag_dict, statprefix='aom'):
+    """Sends stats value to statsd"""
+    client = StatsClient('telegraf', 8125, statprefix)
+
+    # SUBMIT STATS
+    client.gauge(gaugename, value, tags=tag_dict)
+
+
+def has_custom_alert_routing(alert_config):
+    """Checks if alert has custom routing"""
+    return 'lookup' in alert_config['alerts']
+
+
+def get_alert_tags(alert_config, query_result):
+    """Retrieves custom tags from alert"""
+    query_tags = []
+    for tag in alert_config['alerts']['lookup']['tags']:
+        if (alert_config.get('query_type') == 'prometheus' and
+                'metric' in query_result and
+                tag in query_result['metric']):
+            query_tags.append(query_result['metric'][tag])
+        elif ('tags' in query_result and tag in query_result['tags']
+              and query_result['tags'][tag]):
+            query_tags.append(query_result['tags'][tag][0])
+    return tuple(query_tags)
--- a/AoM_Service/library/serviceapp/test_alert.py
+++ b/AoM_Service/library/serviceapp/test_alert.py
@@ -0,0 +1,123 @@
+import unittest
+
+class Mock_Alert_Config() :
+   def __init__(self) :
+      self.cache = {}
+      self.level = {}
+      self.id = "id"
+
+   def set_level(self, k, v) :
+      self.level[k] = v
+
+   def get_level(self, k) :
+      if not k in self.level :
+         return None
+      return self.level[k]
+
+   def init_for_tags(self, *args) :
+      pass
+
+   def occurrences(self) :
+      return 1
+
+   def get_threshold(self, upper, warning) :
+      if warning :
+         return None, False
+      if upper :
+         return 10, True
+      else :
+         return 0, True
+
+   def get_tags(self) :
+      return "tagsC, tagsD".split(", ")
+
+   def set_for_tags(self, key, value) :
+      if not key in self.cache :
+         self.cache[key] = 0
+      self.cache[key] = value
+
+   def get_for_tags(self, key) :
+      if not key in self.cache :
+         self.cache[key] = 0
+      return self.cache[key]
+
+class Mock_Result() :
+   def __init__(self) :
+      pass
+
+   def __getitem__(self, key) :
+      if key == "tags" :
+         return self
+      else :
+         return key
+
+class Mock_Logger() :
+   def __init__(self) :
+      for k in ["error", "warn", "debug", "info", "warning"] :
+         setattr(self, k, self.log)
+
+   def log(self, *args) :
+      pass
+
+class Test_Alert(unittest.TestCase) :
+   def test_set_tags(self) :
+      import alert
+      ac = Mock_Alert_Config()
+      res = Mock_Result()
+
+      al = alert.Alert(ac, Mock_Logger(), None, None, -1, 11)
+      self.assertEqual(al.get_tags(), "instance")
+
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 11)
+      self.assertEqual(al.get_tags(), "tagsA, tagsB")
+
+      al.set_tags("a, b, c", res)
+      self.assertEqual(al.get_tags(), "a, b, c")
+
+      al.set_tags("a, b, c", res)
+      self.assertEqual(al.get_tags(), "a, b, c")
+
+   def test_firing(self) :
+      import alert
+      ac = Mock_Alert_Config()
+      res = Mock_Result()
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 11)
+      self.assertTrue(al.get_firing())
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 1, 11)
+      self.assertTrue(al.get_firing())
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 9)
+      self.assertTrue(al.get_firing())
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 1, 9)
+      self.assertFalse(al.get_firing())
+
+   def test_str(self) :
+      import alert
+      ac = Mock_Alert_Config()
+      res = Mock_Result()
+      alert = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 10)
+
+      self.assertEqual(alert.name(), "Metric: id for tagsA, tagsB")
+      self.assertEqual(alert.body(), "")
+
+   def test_occurrences(self) :
+      import alert
+      ac = Mock_Alert_Config()
+      res = Mock_Result()
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 10)
+      self.assertEqual(False, al.occurrences_breached)
+      al.set_occurrences()
+      al.set_occurrences()
+      al.set_occurrences()
+      self.assertEqual(False, al.occurrences_breached)
+      self.assertEqual(0, ac.get_for_tags(al.get_tags()))
+
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 11)
+      self.assertEqual(True, al.occurrences_breached)
+      al.set_occurrences()
+      al.set_occurrences()
+      al.set_occurrences()
+      self.assertEqual(True, al.occurrences_breached)
+      self.assertEqual(4, ac.get_for_tags(al.get_tags()))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_alert_factory.py
+++ b/AoM_Service/library/serviceapp/test_alert_factory.py
@@ -0,0 +1,33 @@
+import unittest
+import alert_factory
+
+class Mock_Alert() :
+   def __init__(self, *args) :
+      self.args = args
+
+class Mock_Logger() :
+   def __init__(self) :
+      self.info = self.log
+      self.warn = self.log
+      self.warning = self.log
+      self.error = self.log
+      self.debug = self.log
+
+   def log(self, *args, **kwargs) :
+      print(args, kwargs)
+
+class Test_Alert_Factory(unittest.TestCase) :
+   def setUp(self) :
+      self.was = alert_factory.Alert
+      alert_factory.Alert = Mock_Alert
+
+   def tearDown(self) :
+      alert_factory.Alert = self.was
+
+   def test(self) :
+      af = alert_factory.Alert_Factory(None, Mock_Logger())
+      alert = af.build(0, 5, None, "tagA, tagB", False, "tagC, tagD")
+      self.assertTrue(type(alert) == Mock_Alert)
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_service.py
+++ b/AoM_Service/library/serviceapp/test_service.py
@@ -0,0 +1,8 @@
+import unittest
+
+class Test_Service(unittest.TestCase) :
+   def test(self) :
+      raise Exception("not impl")
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_threshold.py
+++ b/AoM_Service/library/serviceapp/test_threshold.py
@@ -0,0 +1,14 @@
+import unittest
+
+class Test_Threshold(unittest.TestCase) :
+   def test(self) :
+      import threshold
+      tl = threshold.Threshold(5)
+
+      self.assertFalse(tl.can_breach())
+
+      self.assertFalse(tl.exceeds(7))
+      self.assertFalse(tl.exceeds(3))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_threshold_lower.py
+++ b/AoM_Service/library/serviceapp/test_threshold_lower.py
@@ -0,0 +1,14 @@
+import unittest
+
+class Test_Threshold_Lower(unittest.TestCase) :
+   def test(self) :
+      import threshold_lower
+      tl = threshold_lower.Threshold_Lower(5)
+
+      self.assertTrue(tl.can_breach)
+
+      self.assertTrue(tl.exceeds(3))
+      self.assertFalse(tl.exceeds(7))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_threshold_upper.py
+++ b/AoM_Service/library/serviceapp/test_threshold_upper.py
@@ -0,0 +1,14 @@
+import unittest
+
+class Test_Threshold_Upper(unittest.TestCase) :
+   def test(self) :
+      import threshold_upper
+      tl = threshold_upper.Threshold_Upper(5)
+
+      self.assertTrue(tl.can_breach)
+
+      self.assertTrue(tl.exceeds(7))
+      self.assertFalse(tl.exceeds(3))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_thresholds.py
+++ b/AoM_Service/library/serviceapp/test_thresholds.py
@@ -0,0 +1,157 @@
+import unittest
+
+class Mock_Alert_Config() :
+   def __init__(self) :
+      self.upCrit = 10
+      self.lowCrit = 1
+
+   def get_threshold(self, upper, warn) :
+      if upper and warn :
+         return None, False
+      elif upper and not warn :
+         return self.upCrit, True
+      elif not upper and warn :
+         return None, False
+      else:
+         return self.lowCrit, True
+
+class Test_Thresholds(unittest.TestCase) :
+   def test_breached_both(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+      t.set_breached(alert_config.lowCrit-1, alert_config.upCrit+1)
+
+      should_fire = [
+         t.critical_breached(),
+         t.lower_breached(),
+         t.upper_breached(),
+
+         t.level_breached(t.CRITICAL),
+         t.end_breached(t.LOWER),
+         t.end_breached(t.UPPER),
+
+         t.get_breached(),
+         t.get_breached(level=t.CRITICAL),
+         t.get_breached(end=t.LOWER),
+         t.get_breached(end=t.UPPER),
+      ]
+      for i in range(len(should_fire)) :
+         self.assertTrue(should_fire[i], i)
+
+      should_not_fire = [
+         t.warning_breached(),
+
+         t.level_breached(t.WARNING),
+
+         t.get_breached(level=t.WARNING),
+      ]
+      for i in range(len(should_not_fire)) :
+         self.assertFalse(should_not_fire[i], i)
+
+
+   def test_breached_lower(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+      t.set_breached(alert_config.lowCrit-1, alert_config.upCrit)
+
+      should_fire = [
+         t.critical_breached(),
+         t.lower_breached(),
+
+         t.level_breached(t.CRITICAL),
+         t.end_breached(t.LOWER),
+
+         t.get_breached(),
+         t.get_breached(level=t.CRITICAL),
+         t.get_breached(end=t.LOWER),
+      ]
+      for i in range(len(should_fire)) :
+         self.assertTrue(should_fire[i], i)
+
+      should_not_fire = [
+         t.warning_breached(),
+         t.upper_breached(),
+
+         t.level_breached(t.WARNING),
+         t.end_breached(t.UPPER),
+
+         t.get_breached(level=t.WARNING),
+         t.get_breached(end=t.UPPER),
+      ]
+      for i in range(len(should_not_fire)) :
+         self.assertFalse(should_not_fire[i], i)
+
+   def test_breached_upper(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+      t.set_breached(alert_config.lowCrit, alert_config.upCrit+1)
+
+      should_fire = [
+         t.critical_breached(),
+         t.upper_breached(),
+
+         t.level_breached(t.CRITICAL),
+         t.end_breached(t.UPPER),
+
+         t.get_breached(),
+         t.get_breached(level=t.CRITICAL),
+         t.get_breached(end=t.UPPER),
+      ]
+      for i in range(len(should_fire)) :
+         self.assertTrue(should_fire[i], i)
+
+      for i in [
+         t.warning_breached(),
+         t.lower_breached(),
+
+         t.level_breached(t.WARNING),
+         t.end_breached(t.LOWER),
+
+         t.get_breached(level=t.WARNING),
+         t.get_breached(end=t.LOWER),
+      ] :
+         self.assertFalse(i)
+
+   def test_breached_notset(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+
+      for i in [
+         t.warning_breached(),
+         t.critical_breached(),
+         t.upper_breached(),
+         t.lower_breached(),
+
+         t.level_breached(t.CRITICAL),
+         t.level_breached(t.WARNING),
+         t.end_breached(t.UPPER),
+         t.end_breached(t.LOWER),
+
+         t.get_breached(),
+         t.get_breached(level=t.CRITICAL),
+         t.get_breached(level=t.WARNING),
+         t.get_breached(end=t.UPPER),
+         t.get_breached(end=t.LOWER),
+      ] :
+         self.assertFalse(i)
+
+   def test_get_matching(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+      self.assertEqual(4, len([i for i in t.get_thresholds_matching()]))
+      self.assertEqual(2, len([i for i in t.get_thresholds_matching(level=t.CRITICAL)]))
+      self.assertEqual(2, len([i for i in t.get_thresholds_matching(level=t.WARNING)]))
+      self.assertEqual(2, len([i for i in t.get_thresholds_matching(end=t.UPPER)]))
+      self.assertEqual(2, len([i for i in t.get_thresholds_matching(end=t.LOWER)]))
+      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.CRITICAL, level=t.LOWER)]))
+      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.CRITICAL, level=t.UPPER)]))
+      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.WARNING, level=t.LOWER)]))
+      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.WARNING, level=t.UPPER)]))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/threshold.py
+++ b/AoM_Service/library/serviceapp/threshold.py
@@ -0,0 +1,19 @@
+class Threshold() :
+   def __init__(self, threshold) :
+      self.threshold = threshold
+      self.breached = False
+
+   def can_breach(self) :
+      return False
+
+   def set_breached(self, value) :
+      self.breached = self.exceeds(value)
+
+   def get_breached(self) :
+      return self.breached
+
+   def exceeds(self, value) :
+      return False
+
+   def get_threshold(self) :
+      return self.threshold
--- a/AoM_Service/library/serviceapp/threshold_lower.py
+++ b/AoM_Service/library/serviceapp/threshold_lower.py
@@ -0,0 +1,8 @@
+from threshold import Threshold
+
+class Threshold_Lower(Threshold) :
+   def exceeds(self, value) :
+      return self.threshold > value
+
+   def can_breach(self) :
+      return True
--- a/AoM_Service/library/serviceapp/threshold_upper.py
+++ b/AoM_Service/library/serviceapp/threshold_upper.py
@@ -0,0 +1,8 @@
+from threshold import Threshold
+
+class Threshold_Upper(Threshold) :
+   def exceeds(self, value) :
+      return self.threshold < value
+
+   def can_breach(self) :
+      return True
--- a/AoM_Service/library/serviceapp/thresholds.py
+++ b/AoM_Service/library/serviceapp/thresholds.py
@@ -0,0 +1,67 @@
+from threshold_upper import Threshold_Upper
+from threshold_lower import Threshold_Lower
+from threshold import Threshold
+
+class Thresholds() :
+   WARNING = True
+   CRITICAL = False
+   UPPER = True
+   LOWER = False
+
+   def __init__(self, alert_config) :
+      self.alert_config = alert_config
+      self.thresholds = {}
+      for level in [ Thresholds.WARNING, Thresholds.CRITICAL ] :
+         self.thresholds[level] = {}
+         for end in [ Thresholds.UPPER, Thresholds.LOWER ] :
+            constructor = Threshold_Upper
+            if end == Thresholds.LOWER :
+               constructor = Threshold_Lower
+            self.thresholds[level][end] = self.create_threshold(end, level, constructor)
+
+   def create_threshold(self, isUpper, isWarning, constructor) :
+      value, has = self.alert_config.get_threshold(isUpper, isWarning)
+      if not has :
+         constructor = Threshold
+      return constructor(value)
+
+   def warning_breached(self) :
+      return self.level_breached(Thresholds.WARNING)
+
+   def critical_breached(self) :
+      return self.level_breached(Thresholds.CRITICAL)
+
+   def upper_breached(self) :
+      return self.end_breached(Thresholds.UPPER)
+
+   def lower_breached(self) :
+      return self.end_breached(Thresholds.LOWER)
+
+   def level_breached(self, level) :
+      return self.get_breached(level=level)
+
+   def end_breached(self, end) :
+      return self.get_breached(end=end)
+
+   def can_breach(self) :
+      can_breach = [t for t in self.thresholds.get_thresholds_matching() if not type(t) is Threshold]
+      return len(can_breach) > 0
+
+   def get_breached(self, level=None, end=None) :
+      for threshold in self.get_thresholds_matching(level=level, end=end) :
+         if threshold.get_breached() :
+            return True
+      return False
+
+   def set_breached(self, min_value, max_value) :
+      for threshold in self.get_thresholds_matching(end=Thresholds.LOWER) :
+         threshold.set_breached(min_value)
+      for threshold in self.get_thresholds_matching(end=Thresholds.UPPER) :
+         threshold.set_breached(max_value)
+
+   def get_thresholds_matching(self, level=None, end=None) :
+      for l in self.thresholds :
+         if level is None or l == level :
+            for e in self.thresholds[l] :
+               if end is None or e == end :
+                  yield self.thresholds[l][e]