cold

2021-09-12 22:16:11 -06:00
commit ceeb6f0385
129 changed files with 9221 additions and 0 deletions
--- a/AoM_Service/library/init.py
+++ b/AoM_Service/library/init.py
--- a/AoM_Service/library/alert_config.py
+++ b/AoM_Service/library/alert_config.py
@@ -0,0 +1,66 @@
+class Alert_Config():
+   def __init__(self, yaml_config) :
+      if not 'alert_tags' in yaml_config :
+         yaml_config['alert_tags'] = {}
+      self.id = str(yaml_config['id'])
+      self.yaml_config = yaml_config
+      self.tags = {}
+      self.state = {}
+
+   def type(self) :
+      if 'type' in self.yaml_config :
+         return self.yaml_config['type']
+      return 'kairos'
+
+   def tags(self) :
+      if 'tags' in self.yaml_config :
+         return self.yaml_config['tags']
+      return []
+
+   def occurrences(self) :
+      if 'occurrences_threshold' in self.yaml_config :
+         return self.yaml_config['occurrences_threshold']
+      return 1
+
+   def url(self) :
+      if 'url' in self.yaml_config :
+         return self.yaml_config['url']
+      from os import environ
+      return environ['AOM_GRAFANA_URL'] + self.id
+
+   def get_level(self, key) :
+      if not key in self.state :
+         self.state[key] = None
+      return self.state[key]
+
+   def set_level(self, key, value) :
+      self.state[key] = value
+
+   def get_for_tags(self, key) :
+      if not key in self.tags :
+         self.tags[key] = 0
+      return self.tags[key]
+
+   def set_for_tags(self, key, value) :
+      if not key in self.tags :
+         self.tags[key] = 0
+      self.tags[key] = value
+
+   def init_for_tags(self, key) :
+      for k in [key, key+"_count"] :
+         if not key in self.tags :
+            self.set_for_tags(key, 0)
+      self.set_for_tags(key+"_noresult", 0)
+
+   def get_threshold(isUpper, isWarning) :
+      if isUpper and isWarning :
+         return self.try_get_yaml_config('warning_upper_threshold')
+      if isUpper and not isWarning :
+         return self.try_get_yaml_config('critical_upper_threshold')
+      elif not isUpper and isWarning :
+         return self.try_get_yaml_config('warning_lower_threshold')
+      elif not isUpper and not isWarning :
+         return self.try_get_yaml_config('critical_lower_threshold')
+
+   def try_get_yaml_config(self, key) :
+      return self.yaml_config[key] if key in self.yaml_config else None, key in self.yaml_config
--- a/AoM_Service/library/alert_config_list.py
+++ b/AoM_Service/library/alert_config_list.py
@@ -0,0 +1,36 @@
+from alert_config import Alert_Config
+
+class Alert_Config_List() :
+   def __init__(self, alert_configs=None) :
+      self.hash = {}
+      if alert_configs :
+         self.add(alert_configs)
+
+   def __getitem__(self, k) :
+      return self.hash[k]
+
+   def __len__(self) :
+      return len(self.hash)
+
+   def add(self, alert_config) :
+      if isinstance(alert_config, Alert_Config):
+         self.hash[alert_config.id] = alert_config
+      elif isinstance(alert_config, list) :
+         for a in alert_config :
+            self.add(a)
+      elif isinstance(alert_config, Alert_Config_List) :
+         for k in alert_config.hash :
+            self.add(alert_config.hash[k])
+      else :
+         raise Exception("unexpected type added to Alert_Config_List")
+
+   def compare(self, other) :
+      if not other :
+         other = Alert_Config_List()
+      self_keys = self.hash.keys()
+      other_keys = other.hash.keys()
+      added = other_keys - self_keys
+      removed = self_keys - other_keys
+      intersection = [i for i in self_keys if i in other_keys]
+      modified = [ i for i in intersection if self[i] != other[i] ]
+      return set(added), set(removed), set(modified)
--- a/AoM_Service/library/args.py
+++ b/AoM_Service/library/args.py
@@ -0,0 +1,163 @@
+# Contians the arg parser options.
+"""Contains the arg parser options."""
+
+
+import argparse
+import sys
+
+
+def get_builder_args():
+    """
+    Gets the arguments passed in to the aom_builder main call
+
+    :return: parser object
+    """
+    parser = argparse.ArgumentParser(
+        description="Generates a valid yaml file "
+        "for alerting on metrics. If you are "
+        "familiar with the yaml structure for an "
+        "alert you don't have to use this builder,"
+        " it's just convenient")
+    parser.add_argument('-q', '--query', help="The Kariosdb query string to "
+                        "use")
+    parser.add_argument(
+        '-i', '--interval', type=int, default=60, help="The "
+        "interval that the check will This value is in seconds")
+    parser.add_argument('-t', '--threshold', '--upperthreshold', help="The "
+                        "upper threshold is the value that when reached will "
+                        "cause an depending on the threshold logic. "
+                        "Use in conjunction with lower threshold to define a "
+                        "normal band.")
+    parser.add_argument(
+        '-b',
+        '--lowerthreshold',
+        help="The lower threshold is the value that when reached will cause an "
+        "alert depending on the threshold logic"
+        "Use in conjunction with upper threshold to define a normal band.")
+    parser.add_argument(
+        '-m',
+        '--measure',
+        choices=[
+            'gt',
+            'lt',
+            'eq'],
+        help="The measure to use to compare the "
+        "threshold to the values of the alerts")
+    parser.add_argument(
+        '-a',
+        '--alert_config',
+        help='A valid Yaml representation of your alerting block')
+    parser.add_argument(
+        '-l',
+        '--log_level',
+        type=int,
+        default=0,
+        help="The log level for the aom_builder run. "
+        "[0=Error, 1=Info, 2=Debug]")
+    parser.add_argument(
+        '-p',
+        '--port',
+        type=int,
+        default=8080,
+        help="The port to run the webapp on")
+
+    return args_to_dict(parser)
+
+
+def get_tester_service_args():
+    """
+    Gets arguments passed into aom_tester.py
+    Returns: parser object
+    """
+    parser = argparse.ArgumentParser(
+        description="Parameters to start the alerting on metrics dummy tester "
+        "service")
+    parser.add_argument(
+        '-l',
+        '--log_level',
+        type=int,
+        default=0,
+        help="The log level for the aom_service app"
+        "[0=Error, 1=Info, 2=Debug]")
+    parser.add_argument(
+        '-a',
+        '--alert_configs',
+        default=None,
+        help="If provided will override the folder location read from the "
+        "config with the value passed in. Is helpful for testing and "
+        "troubleshooting alerts")
+    parser.add_argument(
+        '--hostname',
+        help="If provided, will override the actual hostname check with this "
+        "value")
+    parser.add_argument(
+        '-p',
+        '--port',
+        type=int,
+        default=8080,
+        help="The port to run the webapp on")
+    return args_to_dict(parser)
+
+
+def get_service_args():
+    """
+    Gets arguments passed into aom_service.py
+    Returns: parser object
+    """
+    parser = argparse.ArgumentParser(
+        description="Parameters to start the alerting on metrics service")
+    parser.add_argument(
+        '-l',
+        '--log_level',
+        type=int,
+        default=0,
+        help="The log level for the aom_service app"
+        "[0=Error, 1=Info, 2=Debug]")
+    parser.add_argument(
+        '-a',
+        '--alert_configs',
+        default=None,
+        help="If provided will override the folder location read from the "
+        "config with the value passed in. Is helpful for testing and "
+        "troubleshooting alerts")
+    parser.add_argument(
+        '--alert_routing_lookup',
+        default=None,
+        help="If provided will override the folder used to fetch the alerts "
+        "lookup configuration.")
+    parser.add_argument(
+        '-o',
+        '--override',
+        action='store_true',
+        help="Overrides the check leader election value")
+    parser.add_argument(
+        '--hostname',
+        help="If provided, will override the actual hostname check with this "
+        "value")
+    parser.add_argument(
+        '-p',
+        '--port',
+        type=int,
+        default=8080,
+        help="The port to run the webapp on")
+    return args_to_dict(parser)
+
+
+def args_to_dict(parsed_args):
+    """
+    Converts the argument parser object to a dict
+    Args:
+        parsed_args: Arg parser object
+    Returns:
+        Dictionary of arguments
+    """
+    try:
+        arg_list = parsed_args.parse_args()
+        # RETURN A DICT OF ARGUMENTS
+        arg_dict = dict()
+        for val in vars(arg_list):
+            arg_dict[val] = getattr(arg_list, val)
+        return arg_dict
+    except argparse.ArgumentError:
+        parsed_args.print_help()
+        sys.exit(1)
--- a/AoM_Service/library/config.py
+++ b/AoM_Service/library/config.py
@@ -0,0 +1,226 @@
+# config.py
+"""Functions for loading alert configuration files"""
+import glob
+import os
+import json
+import hashlib
+import yaml
+import requests
+from serviceapp import service
+
+# import logging
+# logger = logging.getLogger(__name__)
+
+
+def md5(fname):
+    """Calculates md5 hash of a filename"""
+    hash_md5 = hashlib.md5()
+    with open(fname, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+
+
+def get_healthy_nodes_and_index(consul_url, hostname, logger):
+    """Find AOM healthy nodes on consult"""
+    try:
+        # getting all registered nodes from consul
+        r = requests.get(
+            consul_url +
+            '/v1/catalog/service/alert-on-metrics',
+            timeout=60)
+        assert r.status_code == 200, "Failed to get back a 200 from consul catalog"
+
+        value = json.loads(r.text)
+        node_list = []
+        host_index = -1
+        for elem in value:
+            node_list.append(elem.get('Node'))
+
+        # Retrieving healthy nodes
+        healthy_nodes = []
+        for node in node_list:
+            r2 = requests.get(
+                consul_url +
+                '/v1/health/node/' +
+                node,
+                timeout=60)
+            assert r.status_code == 200, "Failed to get back a 200 from consul health"
+            healthcheck_list = json.loads(r2.text)
+            for check in healthcheck_list:
+                if (check.get('CheckID') == 'check_healthcheck_alert-on-metrics_alert-on-metrics' and
+                        check.get('Status') == 'passing'):
+                    healthy_nodes.append(node)
+
+        try:
+            healthy_nodes.sort()
+            host_index = healthy_nodes.index(hostname)
+        except ValueError:
+            logger.error("Host is not healthy")
+    except TimeoutError:
+        logger.error("Timed out connecting to Consul")
+    return host_index, len(healthy_nodes)
+
+
+def distribute_configs(
+        filename,
+        host_index,
+        module,
+        logger):
+    """Uses md5 of alert config to split the files among healthy servers"""
+    if module == 0:
+        logger.error("No healthy nodes for the service")
+        return False
+    if host_index == -1:
+        logger.error("Host is unhealthy")
+        return False
+    if int(md5(filename), 16) % module == host_index:
+        return True
+    return False
+
+
+def is_valid(alert_config, logger):
+    """Checks if alert has all required fields"""
+    try:
+        assert alert_config['alerts'], "No Alerts configured, this is a dead config"
+        assert alert_config['query'], "No Query, this is a dead config"
+        assert alert_config['interval'] >= 30, "Intervals less than 30 are invalid"
+        assert alert_config['id'], "Alert ID is empty, this is a dead config"
+        if alert_config.get('query_type') == 'prometheus':
+            assert isinstance(
+                alert_config['query'], str), "Invalid Prometheus query"
+        else:
+            assert isinstance(
+                alert_config['query'], dict), "Kairosdb Query string cannot be validated as proper JSON"
+            defined_tags = set(alert_config['query']['metrics'][0]['tags'].keys()).union(
+                {'', 'dc', 'fqdn'})
+            # IF THERE IS AGGREGATION WE HAVE TO ADD THESE TAGS
+            if 'group_by' in alert_config['query']['metrics'][0]:
+                defined_tags.update(
+                    set(alert_config['query']['metrics'][0]['group_by'][0]['tags']))
+            # for undefined_tag in set(alert_config['tags']).difference(defined_tags):
+            #     print("WARNING! {} tag is not defined on the query. Please make sure it does exist to "\
+            #           "prevent empty results".format(undefined_tag))
+            # OUR MINIMUM THRESHOLD NEED
+        assert 'critical_lower_threshold' in alert_config or 'critical_upper_threshold' in alert_config or \
+            'warning_lower_threshold' in alert_config or 'warning_upper_threshold' in alert_config, \
+            "Config must have at least one threshold set."
+
+        # JUST MAKE SURE YOU ARE NOT DOING SOMETHING STUPID WITH WARNING COMING
+        # AFTER CRITICAL
+        if 'warning_lower_threshold' in alert_config and 'critical_lower_threshold' in alert_config:
+            assert alert_config['critical_lower_threshold'] < alert_config['warning_lower_threshold'], \
+                "Lower Critical must be less than Lower Warning"
+        if 'warning_upper_threshold' in alert_config and 'critical_upper_threshold' in alert_config:
+            assert alert_config['critical_upper_threshold'] > alert_config['warning_upper_threshold'], \
+                "Upper Critical must be greater than Upper Warning"
+
+        if 'lookup' in alert_config['alerts']:
+            assert 'default' in alert_config['alerts']['lookup'], 'No default alert configured for the lookup configuration'
+            assert 'lookup_file' in alert_config['alerts']['lookup'] or 'lookups' in alert_config['alerts'][
+                'lookup'], 'No lookup configured either in the alert configuration or in a separated file'
+            assert 'tags' in alert_config['alerts']['lookup'], 'No tags configured for the lookup configuration'
+            assert all(
+                isinstance(
+                    tag, str) for tag in alert_config['alerts']['lookup']['tags']), 'Tags must be valid string'
+
+        # if 'occurrences_threshold' in alert_config:
+        #     assert alert_config['occurrences_threshold'] >= 1, \
+        #         "Having an occurrences value less than 2 is assumed and pointless to specify"
+    except Exception as e:
+        logger.warning("Invalid config file: {}".format(str(e)))
+        return False
+    return True
+
+
+def is_valid_alert_routing_lookup(alert_routing_lookup, alert, logger):
+    """Check if routing lookup is properly configured"""
+    try:
+        assert alert_routing_lookup, "No lookup values configured, the configuration is empty."
+        for alert_routing in alert_routing_lookup:
+            assert 'alert' in alert_routing, "No alert defined for this configuration."
+            assert 'tags' in alert_routing, "No tags value defined for this configuration."
+            for tag in alert_routing['tags']:
+                assert tag in alert['alerts']['lookup']['tags'], "The tag {} is not part of the configuration".format(
+                    tag)
+            assert all(isinstance(tag, str)
+                       for tag in alert_routing['tags']), "Tags must be valid string"
+    except AssertionError as e:
+        logger.warning("Invalid alert routing config file: {}".format(str(e)))
+        return False
+    return True
+
+
+# noinspection PyBroadException
+def glob_the_configs(
+        config_path,
+        lookup_config_path,
+        consul_url,
+        hostname,
+        logger):
+    """
+    Args:
+        config_path (string): relative path to the configs
+        consul_url (string): url to consul service
+        logger:
+    Returns:
+        List of configs
+    """
+    invalid_configs = 0
+    alert_list = []
+    host_index, module = get_healthy_nodes_and_index(
+        consul_url, hostname, logger)
+    for config_file in glob.glob(config_path + "/**/*.yaml", recursive=True):
+        logger.debug("Found {} config".format(config_file))
+        # LOAD CONFIG
+        if distribute_configs(
+                config_file,
+                host_index,
+                module,
+                logger):
+            try:
+                alert = yaml.safe_load(open(config_file, 'rb').read())
+                if is_valid(alert, logger):
+                    if 'lookup' in alert['alerts']:
+                        alert_routing_lookup = []
+                        is_valid_lookup = True
+                        if 'lookup_file' in alert['alerts']['lookup']:
+                            lookup_path = "{}/{}".format(
+                                lookup_config_path, alert['alerts']['lookup']['lookup_file'])
+                            if os.path.isfile(lookup_path):
+                                alert_routing_lookup = yaml.safe_load(
+                                    open(lookup_path, 'rb').read())
+                            else:
+                                is_valid_lookup = False
+                        else:
+                            alert_routing_lookup = alert['alerts']['lookup']['lookups']
+
+                        is_valid_lookup = is_valid_lookup and is_valid_alert_routing_lookup(
+                            alert_routing_lookup, alert, logger)
+
+                        if is_valid_lookup:
+                            alerts_per_tags = {}
+                            for alert_configuration in alert_routing_lookup:
+                                key = []
+                                for tag in alert['alerts']['lookup']['tags']:
+                                    key.append(
+                                        alert_configuration['tags'].get(tag))
+                                alerts_per_tags[tuple(
+                                    key)] = alert_configuration['alert']
+                            alert['alert_routing_lookup'] = alerts_per_tags
+                        else:
+                            invalid_configs += 1
+                            continue
+                    alert_list.append(alert)
+                else:
+                    invalid_configs += 1
+            except BaseException as e:
+                logger.error("Error parsing {} config: {}".format(config_file, e))
+    logger.info("Invalid configs: {}".format(invalid_configs))
+    service.send_stat(
+        'invalid_configs',
+        invalid_configs,
+        dict(),
+        statprefix='aom')
+    logger.info("Loaded {} configs".format(len(alert_list)))
+    return alert_list
--- a/AoM_Service/library/job.py
+++ b/AoM_Service/library/job.py
@@ -0,0 +1,10 @@
+import subprocess
+
+class Job() :
+   def __init__(self, id, p):
+      self.id = id
+      self.p = p
+
+   def kill(self) :
+      subprocess.call(["/bin/kill", "-9", "{}".format(self.p.pid)])
+      self.p.join()
--- a/AoM_Service/library/job_list.py
+++ b/AoM_Service/library/job_list.py
@@ -0,0 +1,29 @@
+from job import Job
+
+class Job_List() :
+   def __init__(self) :
+      self.jobs = {}
+
+   def __getitem__(self, k) :
+      return self.jobs[k]
+
+   def __setitem__(self, k, v) :
+      self.jobs[k] = v
+
+   def __len__(self) :
+      return len(self.jobs)
+
+   def add(self, job) :
+      if isinstance(job, Job) :
+         self[job.id] = job
+      elif isinstance(job, Job_List) :
+         for j in job.jobs :
+            self.add(job[j])
+      else :
+         raise Exception("unexpected type added to Job_List")
+
+   def kill(self, id) :
+      if not id in self.jobs :
+         return
+      self[id].kill()
+      del(self.jobs[id])
--- a/AoM_Service/library/logger.py
+++ b/AoM_Service/library/logger.py
@@ -0,0 +1,122 @@
+# logger.py
+""" Logging configuration """
+
+
+import logging
+import logging.handlers
+import os
+
+logging.getLogger('requests').setLevel(logging.ERROR)
+logging.getLogger('urllib3').setLevel(logging.ERROR)
+logging.getLogger('werkzeug').setLevel(logging.ERROR)
+
+
+class SingleLevelFilter(logging.Filter):
+    def __init__(self, passlevel, reject):
+        """
+        initilizer(constructor) of the singlelevelfilter
+        @param passlevel (int) - the int value of the level of the log
+        @param reject (bool) - if true will return if the record level is
+            not equal to the passlevel
+        @return SingleLevelFilter object
+        @note Sets some object parameters
+        """
+        self.passlevel = passlevel
+        self.reject = reject
+
+    def filter(self, record):
+        """
+        Returns True/False depending on parameters
+        @param record (Log int) - the record that the filter belongs to
+        @return bool - True/False depending on what self.reject is set to and
+                       what record.levelno and self.passlevel are set to
+        @note This causes either only logging of the exact same level to get
+              logged, or only logging other than the same level to get logged
+        """
+        if self.reject:
+            return record.levelno != self.passlevel
+        return record.levelno == self.passlevel
+
+
+class AlertLogging(logging.Logger):
+    """
+    Class Object to handle the logging of the alert on metrics service
+    starts at Error level and can flip on (and add) an additional log file and
+    Debug logger as needed.
+    """
+
+    def __init__(self, name):
+        """
+        Inits the formaters and logger
+        """
+        self.name = name
+        self.debug_formatter = logging.Formatter(
+            "%(asctime)s - [%(levelname)s] - [%(module)s:%(lineno)d] - "
+            "%(message)s", "%m-%d %H:%M:%S")
+
+        self.standard_formatter = logging.Formatter(
+            "%(asctime)s - [%(levelname)s] -  %(message)s", "%m-%d %H:%M:%S")
+        logging.getLogger()
+        logging.Logger.__init__(self, name, logging.DEBUG)
+        logging.setLoggerClass(AlertLogging)
+
+    def start(self):
+        """
+
+        Returns:
+
+        """
+        info_handler = logging.StreamHandler()
+        info_handler.setLevel(logging.INFO)
+        info_handler.setFormatter(self.standard_formatter)
+        self.addHandler(info_handler)
+        return self
+
+    def start_log_file(self, file_path, mode='a'):
+        """
+        Creates a separate log file handler
+        Args:
+            file_path: path to the log file
+            mode: the type of mode to open the file handler with
+        Returns:
+
+        """
+        self.log_path = file_path
+        work_folder = os.path.dirname(file_path)
+        if work_folder and not os.path.exists(work_folder):
+            os.makedirs(work_folder)
+        self.log_handler = logging.FileHandler(file_path, mode)
+        self.log_handler.setLevel(logging.WARNING)
+        self.log_handler.setFormatter(self.debug_formatter)
+        self.addHandler(self.log_handler)
+
+    def stop_log_file(self):
+        """
+        Closes Log file and sets the handler to None
+        Returns:
+
+        """
+        self.log_handler.close()
+        self.removeHandler(self.log_handler)
+        self.log_handler = None
+
+    def start_debug(self):
+        """
+
+        Returns:
+
+        """
+        self.debug_handler = logging.StreamHandler()
+        self.debug_handler.setLevel(logging.DEBUG)
+        self.debug_handler.addFilter(SingleLevelFilter(logging.DEBUG, False))
+        self.debug_handler.setFormatter(self.debug_formatter)
+        self.addHandler(self.debug_handler)
+
+    def stop_debug(self):
+        """
+        stop the debugger
+        Returns:
+
+        """
+        self.removeHandler(self.debug_handler)
+        self.debug_handler = None
--- a/AoM_Service/library/process.py
+++ b/AoM_Service/library/process.py
@@ -0,0 +1,14 @@
+import multiprocessing
+
+class Process(multiprocessing.Process) :
+   def __init__(self, alert_config, config, logger, production_mode) :
+      multiprocessing.Process.__init__(
+         self,
+         target=self.get_target(),
+         args=(alert_config, config, logger, production_mode),
+         name=alert_config.id,
+         daemon=True,
+      )
+
+   def get_target(self) :
+      raise Exception("abstract method not implemented")
--- a/AoM_Service/library/process_factory.py
+++ b/AoM_Service/library/process_factory.py
@@ -0,0 +1,14 @@
+import process_prometheus
+import process_kairos
+
+class Process_Factory() :
+   def __init__(self, config, logger, production) :
+      self.config = config
+      self.logger = logger
+      self.production = production
+
+   def build(self, alert_config) :
+      if alert_config.type() == "prometheus" :
+         return process_prometheus.Process_Prometheus(alert_config, self.config, self.logger, self.production)
+      else:
+         return process_kairos.Process_Kairos(alert_config, self.config, self.logger, self.production)
--- a/AoM_Service/library/process_kairos.py
+++ b/AoM_Service/library/process_kairos.py
@@ -0,0 +1,6 @@
+import process
+from serviceapp import service
+
+class Process_Kairos(process.Process) :
+   def get_target(self) :
+      return service.check_kairosdb_alert
--- a/AoM_Service/library/process_prometheus.py
+++ b/AoM_Service/library/process_prometheus.py
@@ -0,0 +1,6 @@
+import process
+from serviceapp import service
+
+class Process_Prometheus(process.Process) :
+   def get_target(self) :
+      return service.check_prometheus_alert
--- a/AoM_Service/library/service.py
+++ b/AoM_Service/library/service.py
@@ -0,0 +1,80 @@
+import os
+from alert_config_list import Alert_Config_List
+from alert_config import Alert_Config
+from job_list import Job_List
+from job import Job
+from process_factory import Process_Factory
+from time import sleep
+from config import glob_the_configs
+from serviceapp import service
+
+class Service() :
+   def __init__(self, logger, reload_interval, hostname, config):
+      self.alert_config_list = Alert_Config_List()
+      self.job_list = Job_List()
+      self.logger = logger
+      self.info = self.logger.info
+      self.error = self.logger.error
+      self.reload_interval = reload_interval
+      self.box_hostname = os.environ['HOSTNAME'] if hostname is None else hostname
+      self.production = not "TEST" in os.environ
+      self.config = config
+
+   def start(self) :
+      self.info("Waiting 15s for Consul service to pass")
+      sleep(15)
+      while self.is_running() :
+         new_alert_config_list = self.get_new_alert_config_list()
+         self.purge_stale(new_alert_config_list)
+         self.create_upserted(new_alert_config_list)
+         self.alert_config_list = new_alert_config_list
+         total_jobs = len(self.job_list)
+         self.info("Total running jobs: {}".format(total_jobs))
+         service.send_stat('total_jobs', total_jobs, dict(), statprefix='aom')
+         sleep(self.reload_interval)
+      self.info("Exiting alerts")
+      self.purge_stale(Alert_Config_List())
+
+   def is_running(self) :
+      return True
+
+   def get_new_alert_config_list(self) :
+      try :
+         yaml_configs = self.parse_alert_config_files()
+         alert_configs = [Alert_Config(i) for i in yaml_configs]
+         return Alert_Config_List(alert_configs)
+      except Exception as e :
+         self.error("Failed to load config files: {}".format(e))
+         return []
+
+   def parse_alert_config_files(self) :
+      path = self.config['alert_folder']
+      routing = self.config['alert_routing_config']
+      consul = 'http://consul.service.consul:8500'
+      return glob_the_configs(path, routing, consul, self.box_hostname, self.logger)
+
+   def purge_stale(self, new_alert_config_list) :
+      _, removed_ids, modified_ids = self.alert_config_list.compare(new_alert_config_list)
+      stale_ids = removed_ids.union(modified_ids)
+      for stale_id in stale_ids :
+         self.job_list.kill(stale_id)
+      service.send_stat('removed_jobs', len(removed_ids), dict(), statprefix='aom')
+      self.info("Removed alert_configs: {}".format(removed_ids))
+
+   def create_upserted(self, new_alert_config_list) :
+      added_ids, _, modified_ids = self.alert_config_list.compare(new_alert_config_list)
+      upserted_ids = added_ids.union(modified_ids)
+      for id in upserted_ids :
+         p = self.spawn_process(new_alert_config_list[id])
+         j = Job(id, p)
+         self.job_list.add(j)
+      service.send_stat('new_jobs', len(added_ids), dict(), statprefix='aom')
+      service.send_stat('modified_jobs', len(modified_ids), dict(), statprefix='aom')
+      self.info("Added alert_configs: {}".format(added_ids))
+      self.info("Modified alert_configs: {}".format(added_ids))
+
+   def spawn_process(self, alert_config) :
+      process_factory = Process_Factory(self.config, self.logger, self.production)
+      process = process_factory.build(alert_config)
+      process.start()
+      return process
--- a/AoM_Service/library/serviceapp/init.py
+++ b/AoM_Service/library/serviceapp/init.py
--- a/AoM_Service/library/serviceapp/alert.py
+++ b/AoM_Service/library/serviceapp/alert.py
@@ -0,0 +1,189 @@
+from thresholds import Thresholds
+
+class Alert() :
+   def __init__(self, alert_config, logger, tags, result, min_value, max_value) :
+      self.occurrences_breached = False
+      self.new_level_breached = False
+      self.info = logger.info
+      self.debug = logger.debug
+      self.warning = logger.warning
+      self.error = logger.error
+      self.alert_config = alert_config
+      self.thresholds = Thresholds(alert_config)
+      self.tags = ""
+      self.result = result
+      self.set_tags(tags)
+      self.alert_config.init_for_tags(alert_config.get_tags())
+      self.set_firing(min_value, max_value)
+      if availability :
+         self.info("Sending availability stat 1")
+         self.send_metrics(self.name(), 0 if self.level() == "CRITICAL" else 1, self.result, 'service_level')
+
+   def name(self) :
+      return "Metric: {} for {}".format(self.alert_config.id, self.get_tags())
+
+   def body(self) :
+      body = ""
+      if not self.get_firing() :
+         body = self.get_not_firing_body()
+      else :
+         body = self.get_is_firing_body()
+      self.debug("Alert {}->[{}]->{}, Occurrences={} of {}".format(
+         self.name(),
+         self.get_tags(),
+         self.level(),
+         self.get_occurrences(),
+         self.alert_config.occurrences(),
+      ))
+      self.send_metrics(self.name(), self.level_code(), self.level())
+      #  TODO
+      return body, md5(tag.encode('utf-8')).hexdigest()[:10]
+
+   def level(self) :
+      if not self.get_firing() :
+         return "RECOVERY"
+      if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.CRITICAL)] :
+         return "CRITICAL"
+      if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.WARNING)] :
+         return "WARNING"
+
+   def level_code(self) :
+      level = self.level()
+      if level == "RECOVERY" :
+         return 0
+      elif level == "WARNING" :
+         return 0
+      elif level == "CRITICAL" :
+         return 0
+
+   def get_not_firing_body(self) :
+      body = ""
+      body += get_not_firing_body_threshold()
+      body += get_not_firing_body_occurrences()
+      if not body :
+         self.alert_config.set_for_tags(self.get_tags()+"_count", force)
+         return ""
+      return "GOOD: " + body
+
+   def get_not_firing_body_threshold(self) :
+      if self.result is None :
+         return ""
+      body = ""
+      v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=True)
+      if not ok :
+         v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=False)
+      if ok :
+         body += self.form("<", v)
+      v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=True)
+      if not ok :
+         v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=False)
+      if ok :
+         body += self.form(">", v)
+      return body
+
+   def get_not_firing_body_occurrences(self) :
+      if not self.get_occurrences() :
+         return ""
+      body = ""
+      if not self.result is None :
+         self.send_metrics(self.name(), 1, self.level())
+      else :
+         body += "{} RECOVERY due to no results found from query. Recommend you manually validate recovery\n{}".format(self.name(), self.alert_config.url())
+      self.set_occurrences(force=0)
+      return body
+
+   def get_is_firing_body(self) :
+      body = ""
+      if self.thresholds.get_breached(level=Thresholds.UPPER) :
+         body += self.form(">", self.upper_firing)
+      if self.thresholds.get_breached(level=Thresholds.LOWER) :
+         body += self.form("<", self.upper_firing)
+      if self.occurrences_breached :
+         self.debug("Value {} of {} for tag {} has occurred {} time(s) < threshold of {}".format(
+            self.value,
+            self.name(),
+            self.get_tags(),
+            self.get_occurrences(),
+            self.alert_config.occurrences(),
+         ))
+         return ""
+      return body
+
+   def form(self, operator, static) :
+      return "{}\n{:.2f} {}= {}\n{}".format(
+         self.name(),
+         self.value,
+         operator,
+         static,
+         self.alert_config.url(),
+      )
+
+   def set_tags(self, tags) :
+      if tags :
+         self.tags = tags
+      elif self.result :
+         import itertools
+         result_tags = [ self.result['tags'][x] for x in self.alert_config.get_tags() ]
+         chain = itertools.chain(result_tags)
+         sorted_list = sorted(list(chain))
+         self.tags = ", ".join(sorted_list)
+      if not self.tags :
+         self.tags = "instance"
+
+   def get_tags(self) :
+      return self.tags
+
+   def set_firing(self, min_value, max_value) :
+      self.thresholds = Thresholds(self.alert_config)
+      self.thresholds.set_breached(min_value, max_value)
+      self.set_occurrences()
+      self.set_new_level_breached()
+      self.send_metrics()
+      self.send_threshold_metrics()
+
+   def get_firing(self) :
+      return self.thresholds.get_breached() and self.occurrences_breached
+
+   def get_occurrences(self) :
+      tags = self.get_tags()
+      return self.alert_config.get_for_tags(tags)
+
+   def set_occurrences(self, force=None) :
+      previous_occurrences = self.get_occurrences()
+      if self.thresholds.get_breached() :
+         new_occurrences = previous_occurrences+1
+         self.alert_config.set_for_tags(self.get_tags(), new_occurrences)
+         self.occurrences_breached = self.alert_config.occurrences() <= new_occurrences
+      if force :
+         self.alert_config.set_for_tags(self.get_tags(), force)
+         self.alert_config.set_for_tags(self.get_tags()+"_count", force)
+
+   def send_metrics(self, *args, **kwargs) :
+      print("send_metrics not impl")
+
+   def set_new_level_breached(self) :
+      key = self.get_tags()
+      level = self.level()
+      previous_level = self.alert_config.get_level(key)
+      self.new_level_breached = level != previous_level
+      self.alert_config.set_level(key, level)
+      self.info("testInfo: {} {}".format(
+         "NEW" if self.new_level_breached else "EXISTING",
+         self.level(),
+      ))
+
+   def get_new_level_breached(self) :
+      return self.new_level_breached
+
+   def send_threshold_metrics(self) :
+      #  TODO
+      self.send_metrics(self.alert_config.id, self.value)
+      for level in [Thresholds.WARNING, Thresholds.CRITICAL] :
+         for end in [Thresholds.UPPER, Thresholds.LOWER] :
+            v, ok = self.alert_config.get_threshold(isUpper=level == Thresholds.UPPER, isWarning=end == Thresholds.WARNING)
+            if ok :
+               key = "{}_{}_threshold".format(
+                  "upper" if level == Thresholds.UPPER else "lower",
+                  "warning" if level == Thresholds.WARNING else "critical",
+               )
+               self.send_stat(key, v, {'id':self.name()})
--- a/AoM_Service/library/serviceapp/alert_factory.py
+++ b/AoM_Service/library/serviceapp/alert_factory.py
@@ -0,0 +1,13 @@
+from alert import Alert
+
+class Alert_Factory() :
+   def __init__(self, alert_config, logger) :
+      self.alert_config = alert_config
+      self.logger = logger
+      self.info = logger.info
+      self.warning = logger.warning
+      self.debug = logger.debug
+      self.error = logger.error
+
+   def build(self, minvalue, maxvalue, result, tags, availability, alert_tags) :
+      return Alert(self.alert_config, tags, result, minvalue, maxvalue)
--- a/AoM_Service/library/serviceapp/prom_api.py
+++ b/AoM_Service/library/serviceapp/prom_api.py
@@ -0,0 +1,83 @@
+from datetime import datetime, timedelta
+from urllib.parse import urljoin
+
+import requests
+
+
+class PromAPI:
+    def __init__(self, endpoint='http://127.0.0.1:9090/'):
+        """
+        :param endpoint: address of
+        """
+        self.endpoint = endpoint
+
+    @staticmethod
+    def _to_timestamp(input_):
+        """
+        Convert string input to UNIX timestamp for Prometheus
+        :param input_:
+        :return:
+        """
+        if type(input_) == datetime:
+            return input_.timestamp()
+        if input_ == 'now':
+            return datetime.utcnow().isoformat('T')
+        if type(input_) is str:
+            input_ = float(input_)
+        if type(input_) in [int, float]:
+            if input_ > 0:
+                return input_
+            if input_ == 0:  # return now
+                return datetime.utcnow().isoformat('T')
+            if input_ < 0:
+                return (datetime.utcnow() + timedelta(seconds=input_)).isoformat('T')
+        #assert type(input_) == float
+
+    def query(self, query='prometheus_build_info'):
+        return self._get(
+            uri='/api/v1/query',
+            params=dict(
+                query=query
+            )
+        )
+
+    def query_range(self, query='prometheus_build_info', start=-60, end='now', duration=60):
+        """Get ser"""
+        params = {
+            'query': query
+        }
+        if end is not None:
+            params['end'] = self._to_timestamp(end) + 'Z'
+        if start:
+            params['start'] = self._to_timestamp(start) + 'Z'
+        if duration:
+            params['step'] = duration
+        print(params)
+        return self._get(
+            uri='/api/v1/query_range',
+            params=params
+        )
+
+    def series(self, match='prometheus_build_info', start=-86400, end='now'):
+        """Get ser"""
+        params = {
+            'match[]': match
+        }
+        if end is not None:
+            params['end'] = self._to_timestamp(end) + 'Z'
+        if start:
+            params['start'] = self._to_timestamp(start) + 'Z'
+        print(params)
+        return self._get(
+            uri='/api/v1/series',
+            params=params
+        )
+
+    def _get(self, uri, params, method='GET'):
+        url = urljoin(self.endpoint, uri)
+        assert method == 'GET'
+        result = requests.get(
+            url=url,
+            params=params
+        )
+        return result.json()
--- a/AoM_Service/library/serviceapp/service.py
+++ b/AoM_Service/library/serviceapp/service.py
@@ -0,0 +1,949 @@
+""" Alert On Metrics functions"""
+
+import copy
+import itertools
+import json
+import os
+import random
+import smtplib
+from email.mime.text import MIMEText
+from socket import gaierror
+from time import sleep
+from hashlib import md5
+import requests
+from statsd import StatsClient
+from serviceapp.prom_api import PromAPI
+
+alert_status = [
+    'RECOVERY',
+    'WARNING',
+    'WARNING',
+    'CRITICAL',
+    'CRITICAL',
+    'CRITICAL']
+
+
+def build_alert_message(alert, minvalue, maxvalue, result, logger,
+                        availability, tag=None, alert_tags=None):
+    """
+    Build the alert message
+    Args:
+        alert: the alert object that includes a tag definition
+        minvalue: the min value to test against the threshold
+        maxvalue: the max value to test against the threshold
+        result: the response back from kairosdb
+        logger (log object): does the logging
+        availability: Send availability stat 1
+        tag: If passed in will use this value for the tag instead of
+        getting it from the result object
+        alert_tags: the tags corresponding to the result, used if an
+        alert has to be triggered and a custom routing per tag is configured
+    Returns:
+        Alert message string
+    """
+    # DEFAULT TO MAX VALUE AS THE VALUE WE WILL ALERT ON. LOGIC BELOW
+    # MAY CHANGE THIS.
+    #    value = maxvalue
+    #    # HANDLE THE CASE WHERE SOMEONE HAS NOT SPECIFIED ANY TAGS IN THEIR QUERY
+    #    # (USUALLY A GLOBAL ALL-DC QUERY)
+    #    if tag is None and result is not None:
+    #        tag = ', '.join(sorted(list(itertools.chain(
+    #            *[result['tags'][x] for x in alert['tags']]))))
+    #    tag_count = tag + "_count"
+    #    WE WILL USE THIS ONE LATER FOR TRACKING OCCURRENCES OF KAIROSDB NOT
+    #    RETURNING RESULTS
+    #    tag_noresult = tag + "_noresult"
+    #    if not tag:
+    #        tag = 'instance'
+    #        logger.debug("No tag specified for alert {}".format(alert['id']))
+    # INSTEAD OF TRYING TO HANDLE LOGIC WHERE THESE ARE NOT IN THE OBJECT, PUT
+    # THEM IN AS SOON AS THEY ARE CREATED SO THAT ON FIRST RUN AN ALERT HAS ALL
+    # THE ALERT['alert_tags'][TAG] AND ALERT['alert_tags'][TAG_COUNT] NEEDED
+    #    if 'alert_tags' not in alert:
+    #        alert['alert_tags'] = {}
+    #    if tag not in alert['alert_tags']:
+    #        alert['alert_tags'][tag] = 0
+    #    if tag_count not in alert['alert_tags']:
+    #        alert['alert_tags'][tag_count] = 0
+    # IF WE HIT THIS FUNCTION THEN WE ALWAYS SET (OR RESET) THIS NORESULT
+    # COUNTER TO 0 IE. IF WE ARE HERE IT IMPLIES WE HAVE A RESULT FROM
+    # KAIROSDB OR WE ARE AT THE END OF A LONG PERIOD OF NORESULTS WHERE WE ARE
+    # CLEARING EVERYTHING OUT ANYWAY
+    #    alert['alert_tags'][tag_noresult] = 0
+
+    #    # FIRST FIND OUT WHAT THRESHOLDS ARE SET AND HAVE BEEN BREACHED
+    #    upper_critical_threshold = None
+    #    upper_warning_threshold = None
+    #    lower_warning_threshold = None
+    #    lower_critical_threshold = None
+    #    upper_threshold = None
+    #    lower_threshold = None
+    #    is_warning_alarm = False
+    #    is_critical_alarm = False
+
+    #    # UPPER
+    #    upper_threshold_exists = False
+    #    upper_warning_threshold_breached = False
+    #    upper_critical_threshold_breached = False
+    #    if 'warning_upper_threshold' in alert:
+    #        upper_threshold_exists = True
+    #        upper_warning_threshold = alert['warning_upper_threshold']
+    #        upper_threshold = upper_warning_threshold
+    #        if maxvalue >= upper_warning_threshold:
+    #            upper_warning_threshold_breached = True
+    #            is_warning_alarm = True
+    #    if 'critical_upper_threshold' in alert:
+    #        upper_critical_threshold = alert['critical_upper_threshold']
+    #        if not upper_threshold_exists:
+    #            upper_threshold = upper_critical_threshold
+    #        upper_threshold_exists = True
+    #        # IF CONFIG HAS A CRITICAL THRESHOLD SET AND WE PASS THAT THEN THAT IS
+    #        # OUR THRESHOLD FOR ALERTING
+    #        if maxvalue >= alert['critical_upper_threshold']:
+    #            upper_threshold = upper_critical_threshold
+    #            upper_critical_threshold_breached = True
+    #            is_critical_alarm = True
+    #    upper_threshold_breached = (upper_warning_threshold_breached
+    #                                or upper_critical_threshold_breached)
+
+    #    # LOWER
+    #    lower_threshold_exists = False
+    #    lower_warning_threshold_breached = False
+    #    lower_critical_threshold_breached = False
+    #    if 'warning_lower_threshold' in alert:
+    #        lower_threshold_exists = True
+    #        lower_warning_threshold = alert['warning_lower_threshold']
+    #        lower_threshold = lower_warning_threshold
+    #        if minvalue <= lower_warning_threshold:
+    #            lower_warning_threshold_breached = True
+    #            is_warning_alarm = True
+    #    if 'critical_lower_threshold' in alert:
+    #        lower_critical_threshold = alert['critical_lower_threshold']
+    #        if not lower_threshold_exists:
+    #            lower_threshold = lower_critical_threshold
+    #        lower_threshold_exists = True
+    #        # IF CONFIG HAS A CRITICAL THRESHOLD SET AND WE PASS THAT THEN THAT AS
+    #        # OUR THRESHOLD FOR ALERTING
+    #        if minvalue <= lower_critical_threshold:
+    #            lower_threshold = lower_critical_threshold
+    #            lower_critical_threshold_breached = True
+    #            is_critical_alarm = True
+    #    lower_threshold_breached = (lower_warning_threshold_breached or
+    #                                lower_critical_threshold_breached)
+
+    #    # THIS HAS TO MEAN THERE IS A PROBLEM WITH THE ALERT CONFIG
+    #    if lower_threshold is None and upper_threshold is None:
+    #        logger.debug(
+    #            "ERROR: alert {} does not have any thresholds set on {}".format(
+    #                alert['id'], tag))
+
+    #    # ON TO OCCURRENCES
+    #    if 'occurrences_threshold' in alert:
+    #        occurrences_threshold = alert['occurrences_threshold']
+    #    else:
+    #        occurrences_threshold = 1
+
+    #    alert_entity = "Metric: {} for {}".format(alert['id'], tag)
+
+    #    if 'url' not in alert:
+    #        alert['url'] = os.environ['AOM_GRAFANA_URL'] + str(alert['id'])
+
+    # ====================
+    # PREPARE ALERT BODY STRING AND SET THE VALUE WE WILL USE TO ALERT WITH
+    # ====================
+    #    alert_body = ''
+    #    if upper_threshold_breached:
+    #        alert_body = "{}\n{:.2f} >= {}\n{}".format(
+    #            alert_entity, value, upper_threshold, alert['url'])
+    #    if lower_threshold_breached:
+    #        value = minvalue
+    #        alert_body = "{}\n{:.2f} <= {}\n{}".format(
+    #            alert_entity, value, lower_threshold, alert['url'])
+
+    # SEND SOME STATS OUT AT THIS POINT AS WE KNOW WHERE WE ARE NOW. SEND THE
+    # THRESHOLDS TOO SO THEY CAN BE GRAPHED
+    ###  BREEL TODO ###
+    # if result is not None:
+    #     send_metrics(alert, value, result)
+    #     if 'critical_upper_threshold' in alert:
+    #         send_stat('upper_critical_threshold', upper_critical_threshold,
+    #                   {'id': alert['id']})
+    #     if 'warning_upper_threshold' in alert:
+    #         send_stat('upper_warning_threshold', upper_warning_threshold,
+    #                   {'id': alert['id']})
+    #     if 'critical_lower_threshold' in alert:
+    #         send_stat('lower_critical_threshold', lower_critical_threshold,
+    #                   {'id': alert['id']})
+    #     if 'warning_lower_threshold' in alert:
+    #         send_stat('lower_warning_threshold', lower_warning_threshold,
+    #                   {'id': alert['id']})
+
+    # ====================
+    # APPLY OUR LOGIC TO MAKE SOME DECISIONS
+    # ====================
+    #current_alert_status = alert_status[0]
+    #if not lower_threshold_breached and not upper_threshold_breached:
+    #    # if result is not None:
+    #    #     if lower_threshold_exists and not upper_threshold_exists:
+    #    #         alert_body = "{}\n{:.2f} > {}\n{}".format(
+    #    #             alert_entity, value, lower_threshold, alert['url'])
+    #    #         logger.debug("GOOD: alert {} is higher than lower threshold {}"
+    #    #                      "for value {} on tag {}".format(
+    #    #                          alert['id'], lower_threshold, value, tag))
+    #    #     if upper_threshold_exists and not lower_threshold_exists:
+    #    #         alert_body = "{}\n{:.2f} < {}\n{}".format(
+    #    #             alert_entity, value, upper_threshold, alert['url'])
+    #    #         logger.debug("GOOD: alert {} is below the upper threshold {} "
+    #    #                      "for value {} on tag {}".format(
+    #    #                          alert['id'], upper_threshold, value, tag))
+    #    #     if upper_threshold_exists and lower_threshold_exists:
+    #    #         alert_body = "{}\n{} < {:.2f} < {}\n{}".format(
+    #    #             alert_entity, lower_threshold, value, upper_threshold,
+    #    #             alert['url'])
+    #    #         logger.debug("GOOD: alert {} is between thresholds {} and {} "
+    #    #                      "for value {} on tag {}".format(
+    #    #                          alert['id'], upper_threshold, lower_threshold,
+    #    #                          value, tag))
+    #    # CHECK AND SEE IF TAG LOGIC IS SET, IE. WE WERE PREVIOUSLY IN ALARM
+    #    # STATE
+    #    #if alert['alert_tags'][tag] > 0:
+    #    #    if result is not None:
+    #    #        send_metrics(alert, 1, result, current_alert_status)
+    #    #    logger.info(
+    #    #        "TestInfo: RECOVERY: Clearing values for [{}] - {}".format(
+    #    #            alert['id'], tag))
+    #    #    if result is None:
+    #    #        alert_body = ("{} RECOVERY due to no results found from "
+    #    #                      "KairosDB query. Recommend you manually validate"
+    #    #                      "recovery.\n{}").format(
+    #    #                          alert_entity, alert['url'])
+    #    #    alert['alert_tags'][tag] = 0
+    #    #    alert['alert_tags'][tag_count] = 0
+    #    #    if availability:
+    #    #        logger.info("Sending availability stat 1")
+    #    #        send_metrics(alert, 1, result, 'service_level')
+    #    #else:
+    #    #    # WE RETURN NONE IF NO ALERT (EITHER RECOVERY OR WARNING OR
+    #    #    # CRITICAL) NEEDS TO BE FIRED
+    #    #    alert['alert_tags'][tag_count] = 0
+    #    #    if availability:
+    #    #        logger.info("Sending availability stat 1")
+    #    #        send_metrics(alert, 1, result, 'service_level')
+    #    #    return None
+    #else:
+         ###   BREEL WORKING HERE   ###
+        # ====================
+        # SET KEY / VALUE FOR TAG ON ALERT
+        # 0 == No Alert
+        # 1 == Warning
+        # 2 == Existing Warning Alert
+        # 3 == New Critical
+        # 4+ == Existing Critical Alert
+        # ====================
+        # CHECK IF TAG_COUNT HAS BEEN SET, IF NOT SET IT, IF SO INCREMENT IT
+        #   alert['alert_tags'][tag_count] += 1
+
+        # ALERT WONT FIRE UNLESS THE TAG_COUNT IS MORE THAN THE OCCURRENCES,
+        # THAT BEING EITHER 1 OR WHATEVER WAS SET ALERT HAS EXCEEDED
+        # OCCURRENCES SO RETURN IT
+        #   TODO this doesnt belog in Alert.py
+        #if alert['alert_tags'][tag_count] >= occurrences_threshold:
+        #    # >= 4 MEANS THIS IS A KNOWN CRITICAL, SO NO-OP
+        #    if alert['alert_tags'][tag] < 4:
+        #        if is_warning_alarm and not is_critical_alarm:
+        #            # THIS HANDLES GOING STRAIGHT FROM NORMAL TO WARNING LEVEL
+        #            if alert['alert_tags'][tag] == 0:
+        #                # NEW WARNING
+        #                alert['alert_tags'][tag] = 1
+        #                logger.info("TestInfo: WARNING (NEW): {} - {}".format(
+        #                    alert['id'], tag))
+        #            else:
+        #                # EXISTING WARNING
+        #                alert['alert_tags'][tag] = 2
+        #                logger.info("TestInfo: WARNING (EXISTING): {} - {}".format(
+        #                    alert['id'], tag))
+        #        if is_critical_alarm:
+        #            # THIS HANDLES GOING FROM WARNING LEVEL TO CRITICAL LEVEL
+        #            if (alert['alert_tags'][tag] == 1 or
+        #                    alert['alert_tags'][tag] == 2):
+        #                alert['alert_tags'][tag] = 3
+        #                logger.info("TestInfo: CRITICAL (WAS WARNING): {} - {}".format(
+        #                    alert['id'], tag))
+        #            else:
+        #                # THIS HANDLES GOING STRAIGHT FROM NORMAL TO CRITICAL
+        #                # LEVEL
+        #                if alert['alert_tags'][tag] < 3:
+        #                    # NEW CRITICAL
+        #                    alert['alert_tags'][tag] = 3
+        #                    logger.info("TestInfo: CRITICAL (NEW): {} - {}".format(
+        #                        alert['id'], tag))
+        #                else:
+        #                    # EXISTING CRITICAL
+        #                    alert['alert_tags'][tag] = 4
+        #                    logger.info("TestInfo: CRITICAL (EXISTING): {} - {}".format(
+        #                        alert['id'], tag))
+            # RECORD THE FACT THAT SOMETHING IS STILL IN ALARM STATE IN METRICS
+            # EVEN IF NOT ACTIVELY ALERTING ON IT
+            #  #if is_critical_alarm:
+            #      #current_alert_status = alert_status[3]
+            #      #send_metrics(alert, 2, result, current_alert_status)
+            #      #if availability:
+            #      #    logger.info("Sending availability stat 0")
+            #      #    send_metrics(alert, 0, result, 'service_level')
+            #  #if is_warning_alarm and not is_critical_alarm:
+            #      #current_alert_status = alert_status[1]
+            #      #send_metrics(alert, 1, result, current_alert_status)
+            #      #if availability:
+            #      #    logger.info("Sending availability stat 1")
+            #      #    send_metrics(alert, 1, result, 'service_level')
+            #  logger.debug("{} alert for value {} of {} for tag {} has occurred "
+            #               "{} times. Threshold is >= {} times.".format(
+            #                   current_alert_status,
+            #                   value,
+            #                   alert['id'],
+            #                   tag,
+            #                   alert['alert_tags'][tag_count],
+            #                   occurrences_threshold))
+    #    else:
+    #        # WE RETURN NONE IF NO ALERT (EITHER RECOVERY OR WARNING OR
+    #        # CRITICAL) NEEDS TO BE FIRED
+    #        logger.debug("Value {} of {} for tag {} has occurred {} time(s) < "
+    #                     "threshold of {}".format(
+    #                         value,
+    #                         alert['id'],
+    #                         tag,
+    #                         alert['alert_tags'][tag_count],
+    #                         occurrences_threshold))
+    #        if availability:
+    #            logger.info("Sending availability stat")
+    #            send_metrics(alert, 1, result, 'service_level')
+    #        return None
+
+    #logger.debug(
+    #    "Alert {}->[{}]->{}, Occurrences={}".format(
+    #        alert['id'], tag, current_alert_status,
+    #        alert['alert_tags'][tag_count]))
+    #return alert_entity, alert_body, alert['alert_tags'][tag], alert_tags, md5(tag.encode('utf-8')).hexdigest()[:10]
+
+
+def check_kairosdb_alert(
+        alert_config,
+        service_config,
+        logger,
+        production_mode=True):
+    """
+    Args:
+        alert_config (dict): Config of the alert to run
+        service_config (dict): Holds things like urls, tokens and other things
+        logger (log object): does the logging
+    Returns:
+        None
+    """
+    availability = False
+    # SLEEP A RANDOM TIME BETWEEN 0 AND INTERVAL SO THAT ALL ALERTS DON'T
+    # START AT THE SAME TIME
+    wait_time = random.randint(0, alert_config['interval'])
+    logger.info(
+        "ALERT_CONFIG: {}\tsleep: {}".format(
+            alert_config['id'],
+            wait_time))
+    sleep(wait_time)
+    # For metrics with availability set to true, we default the interval to 5
+    # mins due Grafana limitations
+    if 'availability' in alert_config and alert_config['availability']:
+        availability = True
+    # ====================
+    # EACH CHECK JUST LOOPS
+    # ====================
+    ret = None
+    while True:
+        try:
+            send_stat("check_run", 1, {'id': alert_config['id']})
+            # BUILD URL FOR KAIROSDB METRICS AND QUERY FOR RESULTS
+            query_url = os.path.join(
+                service_config['kairosdb_url'] +
+                "api/v1/datapoints/query")
+            ret = requests.post(
+                query_url,
+                data=json.dumps(
+                    alert_config['query']),
+                timeout=service_config['timeout'])
+            assert ret.status_code == 200
+
+            # GOT DATA BACK, NOW TO COMPARE IT TO THE THRESHOLD
+            results = ret.json()['queries'][0]['results']
+            logger.debug(
+                "Got back {} results for alert {}".format(
+                    len(results), alert_config['id']))
+            log_alert_results(results, alert_config, logger)
+            alert_list = []
+
+            # LOOP THROUGH ALL THE RESULTS
+            for r in results:
+                alert_tags = (get_alert_tags(alert_config, r)
+                              if has_custom_alert_routing(alert_config) else None)
+
+                # OUR QUERY RETURNED SOME VALUES - FIND MIN AND MAX VALUES
+                # THEREIN AND EXAMINE FOR FAILURE
+                if r['values']:
+                    minvalue = min([x[1] for x in r['values']])
+                    maxvalue = max([x[1] for x in r['values']])
+                    # SEND VALUES TO BUILD_ALERT_MESSAGE, WHICH RETURNS NONE OR
+                    # AN OBJECT
+                    alert_list.append(
+                        build_alert_message(
+                            alert_config,
+                            minvalue,
+                            maxvalue,
+                            r,
+                            logger,
+                            availability,
+                            alert_tags=alert_tags))
+
+                # THIS MEANS OUR KAIROS QUERY RETURNED NOTHING. COULD BE NETWORK
+                # ISSUES. WE WILL TOLERATE THIS FOR X OCCURRENCES. (X=10)
+                # AFTER X OCCURRENCES OF KAIROS NOT RETURNING DATA WE WILL CLEAR
+                # AOM'S BRAIN FOR THIS ALERT ID AND TAG COMBINATION TO AVOID A
+                # LATER OCCURRENCE CAUSING A PREMATURE ALERT.
+                # A NO-OP IF NO HISTORY.
+                elif 'alert_tags' in alert_config:
+                    for key in alert_config['alert_tags']:
+                        if ('count' not in key and 'noresult' not in key and
+                                alert_config['alert_tags'][key] > 0):
+                            key_noresult = key + "_noresult"
+                            key_count = key + "_count"
+                            if alert_config['alert_tags'][key_noresult] > 10:
+                                logger.info("{} occurrences of no results back "
+                                            "for {}, clear out counts for tag '{}'".format(
+                                                alert_config['alert_tags'][key_noresult],
+                                                alert_config['id'], key))
+                                alert_list.append(
+                                    build_alert_message(
+                                        alert_config,
+                                        0,
+                                        0,
+                                        None,
+                                        logger,
+                                        availability,
+                                        key,
+                                        alert_tags=alert_tags))
+                                alert_config['alert_tags'][key] = 0
+                                alert_config['alert_tags'][key_count] = 0
+                                alert_config['alert_tags'][key_noresult] = 0
+                            else:
+                                alert_config['alert_tags'][key_noresult] += 1
+                                logger.info("{} occurrences of no results back "
+                                            "for {}, tag '{}'".format(
+                                                alert_config['alert_tags'][key_noresult],
+                                                alert_config['id'], key))
+
+            # SEND ALL ALERTS FOUND TO THE ALERT HANDLERS THAT ARE NOT NONE
+            for alert in [x for x in alert_list if x is not None]:
+                if production_mode:
+                    send_alerts(
+                        alert,
+                        copy.deepcopy(alert_config),
+                        service_config['victorops_url'],
+                        service_config['slack_url'],
+                        service_config['slack_token'],
+                        service_config['smtp_server'],
+                        service_config['sensu_endpoint'],
+                        service_config['uchiwa_url'],
+                        logger)
+                else:
+                    logger.info(
+                        "Sending alert for: {}".format(
+                            alert_config.get('id')))
+
+        # HANDLE THE UNEXPECTED
+        except TimeoutError:
+            logger.error("Query [{}] took to long to run".format(
+                alert_config['id']))
+        except AssertionError:
+            logger.error(
+                "KairsoDB query failed: {}\n"
+                "HTTP status code:\t{}\n"
+                "Error Message:\t{}\nQuery:\n"
+                "{}".format(
+                    ret.url,
+                    ret.status_code,
+                    ret.text,
+                    alert_config['query']))
+        except gaierror:
+            logger.error(
+                "Unable to connect to smtp server: {}".format(
+                    service_config['smtp_server']))
+        except Exception as e:
+            logger.error(
+                "Unhandled exception {} on alert: {}".format(
+                    str(e), alert_config['id']))
+        finally:
+            sleep(alert_config['interval'])
+
+
+def check_prometheus_alert(
+        alert_config,
+        service_config,
+        logger,
+        production_mode=True):
+    """
+    Args:
+        alert_config (dict): Config of the alert to run
+        service_config (dict): Holds things like urls, tokens and other things
+        logger (log object): does the logging
+    Returns:
+        None
+    """
+    # SLEEP A RANDOM TIME BETWEEN 0 AND INTERVAL SO THAT ALL ALERTS DON'T
+    # START AT THE SAME TIME
+    wait_time = random.randint(0, alert_config['interval'])
+    logger.info(
+        "ALERT_CONFIG: {}\tsleep: {}".format(
+            alert_config['id'],
+            wait_time))
+    sleep(wait_time)
+    # For metrics with availability set to true, we default the interval to 5
+    # mins due to Grafana limitations
+    availability = bool(alert_config.get('availability'))
+
+    # ====================
+    # EACH CHECK JUST LOOPS
+    # ====================
+    ret = None
+    while True:
+        try:
+            send_stat("check_run", 1, {'id': alert_config['id']})
+            prom_api = PromAPI(endpoint=alert_config['prometheus_url'])
+            ret = prom_api.query_range(
+                query=alert_config['query'],
+                start=alert_config['start_time'],
+                end=alert_config['end_time'],
+                duration=alert_config['interval'])
+
+            assert ret['status'] == 'success'
+
+            # GOT DATA BACK, NOW TO COMPARE IT TO THE THRESHOLD
+            results = ret['data']['result']
+            logger.debug(
+                "Got back {} results for alert {}".format(
+                    len(results), alert_config['id']))
+            log_alert_results(results, alert_config, logger)
+            alert_list = []
+
+            # LOOP THROUGH ALL THE RESULTS
+            for r in results:
+                alert_tags = (get_alert_tags(alert_config, r) if
+                              has_custom_alert_routing(alert_config) else None)
+
+                # REARRANGE RESULT TO MORE CLOSELY MATCH KAIROSDB RESULT
+                r['tags'] = {key: [value]
+                             for (key, value) in r['metric'].items()}
+
+                # OUR QUERY RETURNED SOME VALUES - FIND MIN AND MAX VALUES
+                # THEREIN AND EXAMINE FOR FAILURE
+                if r['values']:
+                    raw_values = [value for _, value in r['values']]
+                    min_value = float(min(raw_values))
+                    max_value = float(max(raw_values))
+                    # SEND VALUES TO BUILD_ALERT_MESSAGE, WHICH RETURNS NONE OR
+                    # AN OBJECT
+                    alert_list.append(
+                        build_alert_message(
+                            alert_config,
+                            min_value,
+                            max_value,
+                            r,
+                            logger,
+                            availability,
+                            alert_tags=alert_tags))
+
+                # THIS MEANS OUR QUERY RETURNED NOTHING. COULD BE NETWORK ISSUES
+                # WE WILL TOLERATE THIS FOR X OCCURRENCES. (X=10)
+                # AFTER X OCCURRENCES OF NOT RETURNING DATA WE WILL CLEAR AOM'S
+                # BRAIN FOR THIS ALERT ID AND TAG COMBINATION TO AVOID A LATER
+                # OCCURRENCE CAUSING A PREMATURE ALERT. A NO-OP IF NO HISTORY.
+                elif 'alert_tags' in alert_config:
+                    for key in alert_config['alert_tags']:
+                        if ('count' not in key and 'noresult' not in key and
+                                alert_config['alert_tags'][key] > 0):
+                            key_noresult = key + "_noresult"
+                            key_count = key + "_count"
+                            if alert_config['alert_tags'][key_noresult] > 10:
+                                logger.info("{} occurrences of no results back "
+                                            "for {}, clear out counts for tag '{}'".format(
+                                                alert_config['alert_tags'][key_noresult],
+                                                alert_config['id'], key))
+                                alert_list.append(
+                                    build_alert_message(
+                                        alert_config,
+                                        0,
+                                        0,
+                                        None,
+                                        logger,
+                                        availability,
+                                        key,
+                                        alert_tags=alert_tags))
+                                alert_config['alert_tags'][key] = 0
+                                alert_config['alert_tags'][key_count] = 0
+                                alert_config['alert_tags'][key_noresult] = 0
+                            else:
+                                alert_config['alert_tags'][key_noresult] += 1
+                                logger.info("{} occurrences of no results back "
+                                            "for {}, tag '{}'".format(
+                                                alert_config['alert_tags'][key_noresult],
+                                                alert_config['id'], key))
+
+            # SEND ALL ALERTS FOUND TO THE ALERT HANDLERS THAT ARE NOT NONE
+            for alert in [x for x in alert_list if x is not None]:
+                if production_mode:
+                    send_alerts(
+                        alert,
+                        copy.deepcopy(alert_config),
+                        service_config['victorops_url'],
+                        service_config['slack_url'],
+                        service_config['slack_token'],
+                        service_config['smtp_server'],
+                        service_config['sensu_endpoint'],
+                        service_config['uchiwa_url'],
+                        logger)
+                else:
+                    logger.info(
+                        "Sending alert {}".format(
+                            alert_config.get('id')))
+
+        # HANDLE THE UNEXPECTED
+        except TimeoutError:
+            logger.error(
+                "Query [{}] took to long to run".format(
+                    alert_config['id']))
+        except AssertionError:
+            logger.error(
+                "Prometheus query failed:\n"
+                "Status:\t{}\n"
+                "Error Type:\t{}\n"
+                "Error Message:\t{}\n"
+                "Query:\n{}".format(
+                    ret['status'],
+                    ret['errorType'],
+                    ret['error'],
+                    alert_config['query']))
+        except gaierror:
+            logger.error(
+                "Unable to connect to smtp server: {}".format(
+                    service_config['smtp_server']))
+        except Exception as e:
+            logger.error(
+                "Unhandled exception {} on alert: {}".format(
+                    str(e), alert_config['id']))
+        finally:
+            sleep(alert_config['interval'])
+
+
+# LOG ALERT RESULTS SO WE CAN DEBUG IF NEEDED
+def log_alert_results(results, alert_config, logger):
+    """
+    Logs the results broken out by tag provided in the alert_config to the
+    logger for debuging
+    Args:
+        results: the results object returned from the call to kairosdb, of just
+        the results
+        alert_config: config object of the alert
+        logger (log object): does the logging
+    Returns:
+        None, logs to logger
+    """
+
+    for v in results:
+        logger.debug("{} - Result: {}".format(alert_config['id'], v))
+
+
+def send_alerts(
+        alert,
+        alert_config,
+        victorops_url,
+        slack_url,
+        slack_token,
+        smtp_server,
+        sensu_endpoint,
+        uchiwa_url,
+        logger):
+    """
+    Sends out the alerts to VO, Email, and/or Slack
+    Args:
+        alert: the alert tuple:
+            alert[0] == subject alert[1] == body alert[3] == alert_tags alert[4] == md5sum
+        alert_config: the alert configuration object
+        victorops_url: url to victorops
+        slack_url: url to slack api calls
+        slack_token: the token for the alert
+        smtp_server: The server to send mail messages too
+        sensu_endpoint:
+        uchiwa_url:
+        logger (log object): does the logging
+    Returns: None
+    """
+    # GOING TO USE THIS FOR TAGGING SOME METRICS ABOUT WHAT ALERT CHANNEL WAS
+    # USED
+    tag_dict = dict()
+    tag_dict['alert'] = alert_config['id']
+
+    is_custom_alert_routing = has_custom_alert_routing(alert_config)
+    if is_custom_alert_routing:
+        alert_routing = alert_config.get('alert_routing_lookup', {})
+        alert_config['alerts'] = alert_routing.get(
+            alert[3], alert_config['alerts']['lookup']['default'])
+
+    # once we move all alerts into sensu, we dont need to tho this
+    if 'filters' in alert_config:
+        logger.info(
+            "alert_status : {}, alert_config: {}".format(
+                alert[2], alert_config))
+        if 'slack_subdue' in alert_config['filters'] and alert[2] in (
+                1, 2) and alert_config['filters']['slack_subdue']:
+            # unless the alert is critical we dont send it
+            logger.info("Removed slack, alert_config: {}".format(alert_config))
+            alert_config['alerts'].pop('slack', None)
+        if ('victorops_subdue' in alert_config['filters'] and
+                alert[2] in (1, 2) and
+                alert_config['filters']['victorops_subdue']):
+            # unless the alert is critical we dont send it
+            alert_config['alerts'].pop('vo', None)
+            logger.info("Removed vo, alert_config: {}".format(alert_config))
+
+    # ====================
+    # VICTOROPS HANDLING
+    # ====================
+    if 'vo' in alert_config['alerts']:
+        for notify in alert_config['alerts']['vo']:
+            payload = dict(entity_id=alert[0],
+                           message_type=alert_status[alert[2]],
+                           state_message=alert[1])
+            r = None
+            try:
+                r = requests.post(
+                    victorops_url + notify,
+                    data=json.dumps(payload),
+                    headers={
+                        "Content-type": "application-json"})
+                assert r.status_code == 200
+                # Record a VO alert sent event
+                tag_dict['alert_channel_type'] = "VictorOps"
+                tag_dict['who'] = "vo:{}".format(notify)
+                send_stat("alert_channel", 1, tag_dict)
+                # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
+            except AssertionError:
+                logger.error(
+                    "Post to VO failed for {}\n{}:\t{}".format(
+                        alert_config['id'], r.status_code, r.text))
+            except Exception as e:
+                logger.error("Unhandled exception for alert_id:{} "
+                             "when posting to VO: {}".format(
+                                 alert_config['id'], str(e)))
+
+    # ====================
+    # EMAIL HANDLING
+    # ====================
+    if 'email' in alert_config['alerts'] and (
+            alert[2] == 0 or alert[2] == 1 or alert[2] == 3):
+        msg = MIMEText(alert[1])
+        msg['Subject'] = '{} Status: {}'.format(
+            alert[0], alert_status[alert[2]])
+        msg['From'] = 'aom@qualtrics.com'
+        msg['To'] = ','.join(
+            [x + "@qualtrics.com" for x in alert_config['alerts']['email']])
+        try:
+            s = smtplib.SMTP(smtp_server)
+            s.send_message(msg)
+            s.quit()
+            # Record an Email alert sent event
+            tag_dict['alert_channel_type'] = "Email"
+            tag_dict['who'] = "email:{}".format(msg['To'])
+            send_stat("alert_channel", 1, tag_dict)
+            # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
+        except Exception as e:
+            logger.error(
+                "Unhandled exception when sending mail for {} to {}\n{}".format(
+                    alert_config['id'], smtp_server, str(e)))
+
+    # ====================
+    # SENSU HANDLING
+    # ====================
+    if 'sensu' in alert_config['alerts']:
+        # Dictionary with static values for Sensu
+        sensu_dict = {
+            'source': 'AOM',
+            'refresh': 3600,
+            'occurrences': 1,
+            'name': alert_config['id']+'__'+alert[4]}
+        # if alert[3]:
+        #     logger.info(alert)
+        #     sensu_dict['name'] = '_'.join(
+        #         [alert_config['id']] + sorted(list(alert[3])))
+        if 'refresh' in alert_config:
+            sensu_dict['refresh'] = alert_config['refresh']
+        sensu_dict['interval'] = alert_config['interval']
+        sensu_dict['handlers'] = []
+        sensu_dict['dashboard'] = alert_config['url']
+        if 'dependencies' in alert_config['alerts']['sensu'].keys():
+            sensu_dict['dependencies'] = (alert_config['alerts']
+                                          ['sensu']['dependencies'])
+        if 'victorops' in alert_config['alerts']['sensu'].keys():
+            sensu_dict['handlers'].append("victorops")
+            sensu_dict['routing_key'] = (alert_config['alerts']
+                                         ['sensu']['victorops'])
+        # # Leave this here until we have email support in Sensu
+        # if 'email' in alert_config['alerts']['sensu'].keys():
+        #     sensu_dict['handlers'].append("email")
+        #     # verify this option
+        #     sensu_dict['email'] = alert_config['alerts']['sensu']['email']
+        if 'slack' in alert_config['alerts']['sensu'].keys():
+            sensu_dict['handlers'].append("slack")
+            sensu_dict['slack_channel'] = (
+                alert_config['alerts']['sensu']['slack'])
+            # Format alert message
+            sensu_dict['dashboard'] = (
+                "<{}|here> , Uchiwa: <{}?check={}|here> ".format(
+                    alert_config['url'], uchiwa_url, alert_config['id']))
+        if 'jira' in alert_config['alerts']['sensu'].keys():
+            sensu_dict['handlers'].append("jira")
+            sensu_dict.update(alert_config['alerts']['sensu']['jira'])
+        if 'filters' in alert_config:
+            sensu_dict['filters'] = alert_config['filters']
+        # 0 = OK, 1 = WARNING, 2 = CRITICAL
+        sensu_status = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2}
+        sensu_dict['status'] = sensu_status[alert[2]]
+        sensu_dict['output'] = alert[1]
+
+        r = None
+        try:
+            user = os.environ['API_USER']
+            passwd = os.environ['API_PASS']
+            r = requests.post(
+                sensu_endpoint,
+                json.dumps(sensu_dict),
+                auth=(
+                    user,
+                    passwd))
+            assert r.status_code == 202
+        except AssertionError:
+            logger.error(
+                "Post to Sensu failed  {}\n{}:\t{}".format(
+                    alert_config['id'],
+                    r.status_code,
+                    r.text))
+        except Exception as e:
+            logger.error("Unhandled exception for alert_id:{} "
+                         "when posting to Sensu: {}".format(
+                             alert_config['id'], str(e)))
+
+    # ====================
+    # SLACK HANDLING - all Slack alerts will go through Sensu
+    # ====================
+    if 'slack' in alert_config['alerts'] and (
+            alert[2] == 0 or alert[2] == 1 or alert[2] == 3):
+        refresh = alert_config.get('refresh', 3600)
+        dashboard = alert_config.get('url', '')
+        sensu_status = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2}
+        sensu_dict2 = {'handlers': ['slack'],
+                       'interval': alert_config['interval'],
+                       'source': 'AOM',
+                       'refresh': refresh,
+                       'occurrences': 1,
+                       'name': alert_config['id']+'__'+alert[4],
+                       'dashboard': dashboard,
+                       'status': sensu_status[alert[2]],
+                       'output': alert[1]}
+        if is_custom_alert_routing:
+            sensu_dict2['name'] = '_'.join(
+                [alert_config['id']] + list(alert[3]))
+        sensu_dict2['dashboard'] = (
+            "<{}|here> , Uchiwa: <{}?check={}|here> ".format(
+                alert_config['url'], uchiwa_url, alert_config['id']))
+        for channel in alert_config['alerts']['slack']:
+            sensu_dict2['slack_channel'] = channel
+            r = None
+            try:
+                user = os.environ['API_USER']
+                passwd = os.environ['API_PASS']
+                r = requests.post(
+                    sensu_endpoint,
+                    json.dumps(sensu_dict2),
+                    auth=(
+                        user,
+                        passwd))
+                assert r.status_code == 202
+            except AssertionError:
+                logger.error(
+                    "Post to Sensu failed  {}\n{}:\t{}".format(
+                        alert_config['id'], r.status_code, r.text))
+            except Exception as e:
+                logger.error("Unhandled exception for alert_id:{} when posting"
+                             "to Sensu: {}".format(alert_config['id'], str(e)))
+
+# payload = dict(token=slack_token, channel=channel,
+#                text="{} Status: {}".format(alert[1], alert_status[alert[2]]))
+# r = None
+# try:
+#     r = requests.post(slack_url, data=payload)
+#     assert r.status_code == 200
+#     # Record an Slack alert sent event
+#     tag_dict['alert_channel_type'] = "Slack"
+#     tag_dict['who'] = "slack:{}".format(channel)
+#     send_stat("alert_channel", 1, tag_dict)
+#     # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
+# except AssertionError:
+#     logger.error("Post to Slack failed for {}\n{}:\t{}".format(alert_config['id'], r.status_code, r.text))
+# except Exception as e:
+#     logger.error("Unhandled exception for alert_id:{} when posting to Slack: {}".format(alert_config['id'],
+# str(e)))
+
+
+def send_metrics(alert, value, result, gaugename='stats'):
+    """
+    Sends the results from the alert check to statsd
+    Args:
+        alert: The Alert config object that holds the alert['tag'] value.
+        gaugename: The name of the gauge metric we send.
+        value: The value we want to send as a gauge.
+        result: The result object from making the call. Use the data in this
+        object to tag the metric.
+    Returns: None
+    """
+    # GROUP ALL THE ALERTS TOGETHER SO THAT PEEPS CAN FILTER OUT BY TAG THEIR
+    # SPECIFIC ALERTS
+    result_tags = list(itertools.chain(
+        *[result['tags'][x] for x in alert['tags']]))
+    tag_dict = dict()
+    for x in range(len(alert['tags'])):
+        tag_dict[alert['tags'][x]] = result_tags[x]
+    tag_dict['alert'] = alert['id']
+
+    # SEND THE METRIC
+    send_stat(gaugename, value, tag_dict)
+
+
+def send_stat(gaugename, value, tag_dict, statprefix='aom'):
+    """Sends stats value to statsd"""
+    client = StatsClient('telegraf', 8125, statprefix)
+
+    # SUBMIT STATS
+    client.gauge(gaugename, value, tags=tag_dict)
+
+
+def has_custom_alert_routing(alert_config):
+    """Checks if alert has custom routing"""
+    return 'lookup' in alert_config['alerts']
+
+
+def get_alert_tags(alert_config, query_result):
+    """Retrieves custom tags from alert"""
+    query_tags = []
+    for tag in alert_config['alerts']['lookup']['tags']:
+        if (alert_config.get('query_type') == 'prometheus' and
+                'metric' in query_result and
+                tag in query_result['metric']):
+            query_tags.append(query_result['metric'][tag])
+        elif ('tags' in query_result and tag in query_result['tags']
+              and query_result['tags'][tag]):
+            query_tags.append(query_result['tags'][tag][0])
+    return tuple(query_tags)
--- a/AoM_Service/library/serviceapp/test_alert.py
+++ b/AoM_Service/library/serviceapp/test_alert.py
@@ -0,0 +1,123 @@
+import unittest
+
+class Mock_Alert_Config() :
+   def __init__(self) :
+      self.cache = {}
+      self.level = {}
+      self.id = "id"
+
+   def set_level(self, k, v) :
+      self.level[k] = v
+
+   def get_level(self, k) :
+      if not k in self.level :
+         return None
+      return self.level[k]
+
+   def init_for_tags(self, *args) :
+      pass
+
+   def occurrences(self) :
+      return 1
+
+   def get_threshold(self, upper, warning) :
+      if warning :
+         return None, False
+      if upper :
+         return 10, True
+      else :
+         return 0, True
+
+   def get_tags(self) :
+      return "tagsC, tagsD".split(", ")
+
+   def set_for_tags(self, key, value) :
+      if not key in self.cache :
+         self.cache[key] = 0
+      self.cache[key] = value
+
+   def get_for_tags(self, key) :
+      if not key in self.cache :
+         self.cache[key] = 0
+      return self.cache[key]
+
+class Mock_Result() :
+   def __init__(self) :
+      pass
+
+   def __getitem__(self, key) :
+      if key == "tags" :
+         return self
+      else :
+         return key
+
+class Mock_Logger() :
+   def __init__(self) :
+      for k in ["error", "warn", "debug", "info", "warning"] :
+         setattr(self, k, self.log)
+
+   def log(self, *args) :
+      pass
+
+class Test_Alert(unittest.TestCase) :
+   def test_set_tags(self) :
+      import alert
+      ac = Mock_Alert_Config()
+      res = Mock_Result()
+
+      al = alert.Alert(ac, Mock_Logger(), None, None, -1, 11)
+      self.assertEqual(al.get_tags(), "instance")
+
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 11)
+      self.assertEqual(al.get_tags(), "tagsA, tagsB")
+
+      al.set_tags("a, b, c", res)
+      self.assertEqual(al.get_tags(), "a, b, c")
+
+      al.set_tags("a, b, c", res)
+      self.assertEqual(al.get_tags(), "a, b, c")
+
+   def test_firing(self) :
+      import alert
+      ac = Mock_Alert_Config()
+      res = Mock_Result()
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 11)
+      self.assertTrue(al.get_firing())
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 1, 11)
+      self.assertTrue(al.get_firing())
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 9)
+      self.assertTrue(al.get_firing())
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 1, 9)
+      self.assertFalse(al.get_firing())
+
+   def test_str(self) :
+      import alert
+      ac = Mock_Alert_Config()
+      res = Mock_Result()
+      alert = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 10)
+
+      self.assertEqual(alert.name(), "Metric: id for tagsA, tagsB")
+      self.assertEqual(alert.body(), "")
+
+   def test_occurrences(self) :
+      import alert
+      ac = Mock_Alert_Config()
+      res = Mock_Result()
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 10)
+      self.assertEqual(False, al.occurrences_breached)
+      al.set_occurrences()
+      al.set_occurrences()
+      al.set_occurrences()
+      self.assertEqual(False, al.occurrences_breached)
+      self.assertEqual(0, ac.get_for_tags(al.get_tags()))
+
+      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 11)
+      self.assertEqual(True, al.occurrences_breached)
+      al.set_occurrences()
+      al.set_occurrences()
+      al.set_occurrences()
+      self.assertEqual(True, al.occurrences_breached)
+      self.assertEqual(4, ac.get_for_tags(al.get_tags()))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_alert_factory.py
+++ b/AoM_Service/library/serviceapp/test_alert_factory.py
@@ -0,0 +1,33 @@
+import unittest
+import alert_factory
+
+class Mock_Alert() :
+   def __init__(self, *args) :
+      self.args = args
+
+class Mock_Logger() :
+   def __init__(self) :
+      self.info = self.log
+      self.warn = self.log
+      self.warning = self.log
+      self.error = self.log
+      self.debug = self.log
+
+   def log(self, *args, **kwargs) :
+      print(args, kwargs)
+
+class Test_Alert_Factory(unittest.TestCase) :
+   def setUp(self) :
+      self.was = alert_factory.Alert
+      alert_factory.Alert = Mock_Alert
+
+   def tearDown(self) :
+      alert_factory.Alert = self.was
+
+   def test(self) :
+      af = alert_factory.Alert_Factory(None, Mock_Logger())
+      alert = af.build(0, 5, None, "tagA, tagB", False, "tagC, tagD")
+      self.assertTrue(type(alert) == Mock_Alert)
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_service.py
+++ b/AoM_Service/library/serviceapp/test_service.py
@@ -0,0 +1,8 @@
+import unittest
+
+class Test_Service(unittest.TestCase) :
+   def test(self) :
+      raise Exception("not impl")
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_threshold.py
+++ b/AoM_Service/library/serviceapp/test_threshold.py
@@ -0,0 +1,14 @@
+import unittest
+
+class Test_Threshold(unittest.TestCase) :
+   def test(self) :
+      import threshold
+      tl = threshold.Threshold(5)
+
+      self.assertFalse(tl.can_breach())
+
+      self.assertFalse(tl.exceeds(7))
+      self.assertFalse(tl.exceeds(3))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_threshold_lower.py
+++ b/AoM_Service/library/serviceapp/test_threshold_lower.py
@@ -0,0 +1,14 @@
+import unittest
+
+class Test_Threshold_Lower(unittest.TestCase) :
+   def test(self) :
+      import threshold_lower
+      tl = threshold_lower.Threshold_Lower(5)
+
+      self.assertTrue(tl.can_breach)
+
+      self.assertTrue(tl.exceeds(3))
+      self.assertFalse(tl.exceeds(7))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_threshold_upper.py
+++ b/AoM_Service/library/serviceapp/test_threshold_upper.py
@@ -0,0 +1,14 @@
+import unittest
+
+class Test_Threshold_Upper(unittest.TestCase) :
+   def test(self) :
+      import threshold_upper
+      tl = threshold_upper.Threshold_Upper(5)
+
+      self.assertTrue(tl.can_breach)
+
+      self.assertTrue(tl.exceeds(7))
+      self.assertFalse(tl.exceeds(3))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/test_thresholds.py
+++ b/AoM_Service/library/serviceapp/test_thresholds.py
@@ -0,0 +1,157 @@
+import unittest
+
+class Mock_Alert_Config() :
+   def __init__(self) :
+      self.upCrit = 10
+      self.lowCrit = 1
+
+   def get_threshold(self, upper, warn) :
+      if upper and warn :
+         return None, False
+      elif upper and not warn :
+         return self.upCrit, True
+      elif not upper and warn :
+         return None, False
+      else:
+         return self.lowCrit, True
+
+class Test_Thresholds(unittest.TestCase) :
+   def test_breached_both(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+      t.set_breached(alert_config.lowCrit-1, alert_config.upCrit+1)
+
+      should_fire = [
+         t.critical_breached(),
+         t.lower_breached(),
+         t.upper_breached(),
+
+         t.level_breached(t.CRITICAL),
+         t.end_breached(t.LOWER),
+         t.end_breached(t.UPPER),
+
+         t.get_breached(),
+         t.get_breached(level=t.CRITICAL),
+         t.get_breached(end=t.LOWER),
+         t.get_breached(end=t.UPPER),
+      ]
+      for i in range(len(should_fire)) :
+         self.assertTrue(should_fire[i], i)
+
+      should_not_fire = [
+         t.warning_breached(),
+
+         t.level_breached(t.WARNING),
+
+         t.get_breached(level=t.WARNING),
+      ]
+      for i in range(len(should_not_fire)) :
+         self.assertFalse(should_not_fire[i], i)
+
+
+   def test_breached_lower(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+      t.set_breached(alert_config.lowCrit-1, alert_config.upCrit)
+
+      should_fire = [
+         t.critical_breached(),
+         t.lower_breached(),
+
+         t.level_breached(t.CRITICAL),
+         t.end_breached(t.LOWER),
+
+         t.get_breached(),
+         t.get_breached(level=t.CRITICAL),
+         t.get_breached(end=t.LOWER),
+      ]
+      for i in range(len(should_fire)) :
+         self.assertTrue(should_fire[i], i)
+
+      should_not_fire = [
+         t.warning_breached(),
+         t.upper_breached(),
+
+         t.level_breached(t.WARNING),
+         t.end_breached(t.UPPER),
+
+         t.get_breached(level=t.WARNING),
+         t.get_breached(end=t.UPPER),
+      ]
+      for i in range(len(should_not_fire)) :
+         self.assertFalse(should_not_fire[i], i)
+
+   def test_breached_upper(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+      t.set_breached(alert_config.lowCrit, alert_config.upCrit+1)
+
+      should_fire = [
+         t.critical_breached(),
+         t.upper_breached(),
+
+         t.level_breached(t.CRITICAL),
+         t.end_breached(t.UPPER),
+
+         t.get_breached(),
+         t.get_breached(level=t.CRITICAL),
+         t.get_breached(end=t.UPPER),
+      ]
+      for i in range(len(should_fire)) :
+         self.assertTrue(should_fire[i], i)
+
+      for i in [
+         t.warning_breached(),
+         t.lower_breached(),
+
+         t.level_breached(t.WARNING),
+         t.end_breached(t.LOWER),
+
+         t.get_breached(level=t.WARNING),
+         t.get_breached(end=t.LOWER),
+      ] :
+         self.assertFalse(i)
+
+   def test_breached_notset(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+
+      for i in [
+         t.warning_breached(),
+         t.critical_breached(),
+         t.upper_breached(),
+         t.lower_breached(),
+
+         t.level_breached(t.CRITICAL),
+         t.level_breached(t.WARNING),
+         t.end_breached(t.UPPER),
+         t.end_breached(t.LOWER),
+
+         t.get_breached(),
+         t.get_breached(level=t.CRITICAL),
+         t.get_breached(level=t.WARNING),
+         t.get_breached(end=t.UPPER),
+         t.get_breached(end=t.LOWER),
+      ] :
+         self.assertFalse(i)
+
+   def test_get_matching(self) :
+      import thresholds
+      alert_config = Mock_Alert_Config()
+      t = thresholds.Thresholds(alert_config)
+      self.assertEqual(4, len([i for i in t.get_thresholds_matching()]))
+      self.assertEqual(2, len([i for i in t.get_thresholds_matching(level=t.CRITICAL)]))
+      self.assertEqual(2, len([i for i in t.get_thresholds_matching(level=t.WARNING)]))
+      self.assertEqual(2, len([i for i in t.get_thresholds_matching(end=t.UPPER)]))
+      self.assertEqual(2, len([i for i in t.get_thresholds_matching(end=t.LOWER)]))
+      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.CRITICAL, level=t.LOWER)]))
+      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.CRITICAL, level=t.UPPER)]))
+      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.WARNING, level=t.LOWER)]))
+      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.WARNING, level=t.UPPER)]))
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/serviceapp/threshold.py
+++ b/AoM_Service/library/serviceapp/threshold.py
@@ -0,0 +1,19 @@
+class Threshold() :
+   def __init__(self, threshold) :
+      self.threshold = threshold
+      self.breached = False
+
+   def can_breach(self) :
+      return False
+
+   def set_breached(self, value) :
+      self.breached = self.exceeds(value)
+
+   def get_breached(self) :
+      return self.breached
+
+   def exceeds(self, value) :
+      return False
+
+   def get_threshold(self) :
+      return self.threshold
--- a/AoM_Service/library/serviceapp/threshold_lower.py
+++ b/AoM_Service/library/serviceapp/threshold_lower.py
@@ -0,0 +1,8 @@
+from threshold import Threshold
+
+class Threshold_Lower(Threshold) :
+   def exceeds(self, value) :
+      return self.threshold > value
+
+   def can_breach(self) :
+      return True
--- a/AoM_Service/library/serviceapp/threshold_upper.py
+++ b/AoM_Service/library/serviceapp/threshold_upper.py
@@ -0,0 +1,8 @@
+from threshold import Threshold
+
+class Threshold_Upper(Threshold) :
+   def exceeds(self, value) :
+      return self.threshold < value
+
+   def can_breach(self) :
+      return True
--- a/AoM_Service/library/serviceapp/thresholds.py
+++ b/AoM_Service/library/serviceapp/thresholds.py
@@ -0,0 +1,67 @@
+from threshold_upper import Threshold_Upper
+from threshold_lower import Threshold_Lower
+from threshold import Threshold
+
+class Thresholds() :
+   WARNING = True
+   CRITICAL = False
+   UPPER = True
+   LOWER = False
+
+   def __init__(self, alert_config) :
+      self.alert_config = alert_config
+      self.thresholds = {}
+      for level in [ Thresholds.WARNING, Thresholds.CRITICAL ] :
+         self.thresholds[level] = {}
+         for end in [ Thresholds.UPPER, Thresholds.LOWER ] :
+            constructor = Threshold_Upper
+            if end == Thresholds.LOWER :
+               constructor = Threshold_Lower
+            self.thresholds[level][end] = self.create_threshold(end, level, constructor)
+
+   def create_threshold(self, isUpper, isWarning, constructor) :
+      value, has = self.alert_config.get_threshold(isUpper, isWarning)
+      if not has :
+         constructor = Threshold
+      return constructor(value)
+
+   def warning_breached(self) :
+      return self.level_breached(Thresholds.WARNING)
+
+   def critical_breached(self) :
+      return self.level_breached(Thresholds.CRITICAL)
+
+   def upper_breached(self) :
+      return self.end_breached(Thresholds.UPPER)
+
+   def lower_breached(self) :
+      return self.end_breached(Thresholds.LOWER)
+
+   def level_breached(self, level) :
+      return self.get_breached(level=level)
+
+   def end_breached(self, end) :
+      return self.get_breached(end=end)
+
+   def can_breach(self) :
+      can_breach = [t for t in self.thresholds.get_thresholds_matching() if not type(t) is Threshold]
+      return len(can_breach) > 0
+
+   def get_breached(self, level=None, end=None) :
+      for threshold in self.get_thresholds_matching(level=level, end=end) :
+         if threshold.get_breached() :
+            return True
+      return False
+
+   def set_breached(self, min_value, max_value) :
+      for threshold in self.get_thresholds_matching(end=Thresholds.LOWER) :
+         threshold.set_breached(min_value)
+      for threshold in self.get_thresholds_matching(end=Thresholds.UPPER) :
+         threshold.set_breached(max_value)
+
+   def get_thresholds_matching(self, level=None, end=None) :
+      for l in self.thresholds :
+         if level is None or l == level :
+            for e in self.thresholds[l] :
+               if end is None or e == end :
+                  yield self.thresholds[l][e]
--- a/AoM_Service/library/test_alert_config.py
+++ b/AoM_Service/library/test_alert_config.py
@@ -0,0 +1,14 @@
+import unittest
+
+class Test_Alert_Config(unittest.TestCase):
+   def test(self) :
+      from alert_config import Alert_Config
+      try :
+         Alert_Config(None)
+         self.fail("did not fail on nil yaml_config")
+      except Exception :
+         pass
+      self.assertEqual("a", Alert_Config({"id":"a"}).id)
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/test_alert_config_list.py
+++ b/AoM_Service/library/test_alert_config_list.py
@@ -0,0 +1,54 @@
+import unittest
+import alert_config_list
+
+class Mock_Alert_Config() :
+   def __init__(self, id) :
+      self.id = id
+
+class Test_Alert_Config_List(unittest.TestCase):
+   def setUp(self) :
+      self.was = alert_config_list.Alert_Config
+      alert_config_list.Alert_Config = Mock_Alert_Config
+      self.al = alert_config_list.Alert_Config_List()
+
+   def tearDown(self) :
+      alert_config_list.Alert_Config = self.was
+      self.al = None
+
+   def test_add(self) :
+      self.al.add(Mock_Alert_Config("a"))
+      self.assertEqual(len(self.al), 1)
+      self.al.add([Mock_Alert_Config("a")])
+      self.assertEqual(len(self.al), 1)
+
+      self.al.add([Mock_Alert_Config("b")])
+      self.assertEqual(len(self.al), 2)
+
+      self.al.add(Mock_Alert_Config("c"))
+      self.assertEqual(len(self.al), 3)
+
+      other = alert_config_list.Alert_Config_List()
+      other.add(Mock_Alert_Config("d"))
+      self.al.add(other)
+      self.assertEqual(len(self.al), 4)
+
+   def test_compare(self) :
+      self.al.add(Mock_Alert_Config("a"))
+      self.al.add(Mock_Alert_Config("b"))
+      self.al.add(Mock_Alert_Config("c"))
+
+      new = alert_config_list.Alert_Config_List()
+      new.add(Mock_Alert_Config("a"))
+      new.add(Mock_Alert_Config("y"))
+      new.add(Mock_Alert_Config("z"))
+
+      added, removed, modified = self.al.compare(new)
+      if not "y" in added or not "z" in added :
+         self.fail("added is missing elements")
+      if not "b" in removed or not "c" in removed :
+         self.fail("removed is missing elements")
+      if not "a" in modified :
+         self.fail("modified is missing elements")
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/test_job.py
+++ b/AoM_Service/library/test_job.py
@@ -0,0 +1,34 @@
+import unittest
+import job
+
+class Mock_Subprocess() :
+   called = False
+   joined = False
+   pid = None
+   def __init__(self) :
+      pass
+
+   def call(self, *args, **kwargs) :
+      self.called = True
+
+   def join(self, *args, **kwargs) :
+      self.joined = True
+
+class Test_Job(unittest.TestCase):
+   def setUp(self) :
+      self.was = job.subprocess
+      self.subprocess = Mock_Subprocess()
+      job.subprocess = self.subprocess
+
+   def tearDown(self) :
+      job.subprocess = self.was
+
+   def test(self) :
+      p = Mock_Subprocess()
+      j = job.Job("id", p)
+      j.kill()
+      self.assertEqual(p.joined, True)
+      self.assertEqual(self.subprocess.called, True)
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/test_job_list.py
+++ b/AoM_Service/library/test_job_list.py
@@ -0,0 +1,50 @@
+import unittest
+
+import job_list
+
+class Mock_Job() :
+   def __init__(self, id, p) :
+      self.id = id
+
+   def kill(self) :
+      return
+
+class Test_Job_List(unittest.TestCase):
+   def setUp(self) :
+      self.was = job_list.Job
+      job_list.Job = Mock_Job
+
+   def tearDown(self) :
+      job_list.Job = self.was
+
+   def test_add(self) :
+      jl = job_list.Job_List()
+      self.assertEqual(len(jl), 0)
+
+      try :
+         jl.add(None)
+         self.fail("can add nil to job_list")
+      except Exception :
+         pass
+
+      jl.add(Mock_Job("a", "a"))
+      self.assertEqual(len(jl), 1)
+
+      jl.add(Mock_Job("a", "a"))
+      self.assertEqual(len(jl), 1)
+
+      jl.add(Mock_Job("b", "b"))
+      self.assertEqual(len(jl), 2)
+
+      other = job_list.Job_List()
+      other.add(Mock_Job("b", "b"))
+      other.add(Mock_Job("c", "c"))
+
+      jl.add(other)
+      self.assertEqual(len(jl), 3)
+
+      jl.kill("a")
+      self.assertEqual(len(jl), 2)
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/test_process.py
+++ b/AoM_Service/library/test_process.py
@@ -0,0 +1,31 @@
+import unittest
+import process
+
+class Mock_Multiprocessing():
+   def __init__(self, *args, **kwargs) :
+      self.args = args
+      self.kwargs = kwargs
+
+   def get_target(self) :
+      return None
+
+class Mock_Alert_Config() :
+   def __init__(self, id) :
+      self.id = id
+
+class Test_Process(unittest.TestCase):
+   def setUp(self) :
+      self.was = process.multiprocessing.Process
+      process.multiprocessing.Process = Mock_Multiprocessing
+
+   def tearDown(self) :
+      process.multiprocessing.Process = self.was
+
+   def test(self) :
+      class MockProcess(process.Process) :
+         def get_target(self) :
+            return None
+      p = MockProcess(Mock_Alert_Config("a"), {}, None, True)
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/test_process_factory.py
+++ b/AoM_Service/library/test_process_factory.py
@@ -0,0 +1,36 @@
+import unittest
+import process_factory
+
+class Mock_Process_Prometheus() :
+   def __init__(self, *args, **kwargs) :
+      pass
+
+class Mock_Process_Kairos() :
+   def __init__(self, *args, **kwargs) :
+      pass
+
+class Mock_Alert_Config() :
+   def __init__(self, type) :
+      self.t = type
+
+   def type(self) :
+      return self.t
+
+class Test_Process_Factory(unittest.TestCase):
+   def setUp(self) :
+      self.was_prom = process_factory.process_prometheus.Process_Prometheus
+      self.was_kai = process_factory.process_kairos.Process_Kairos
+      process_factory.process_prometheus.Process_Prometheus = Mock_Process_Prometheus
+      process_factory.process_kairos.Process_Kairos = Mock_Process_Kairos
+
+   def tearDown(self) :
+      process_factory.process_prometheus.Process_Prometheus = self.was_prom
+      process_factory.process_kairos.Process_Kairos = self.was_kai
+
+   def test(self) :
+      factory = process_factory.Process_Factory(None, None, None)
+      self.assertTrue(type(factory.build(Mock_Alert_Config("a"))) is Mock_Process_Kairos)
+      self.assertTrue(type(factory.build(Mock_Alert_Config("prometheus"))) is Mock_Process_Prometheus)
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/test_process_kairos.py
+++ b/AoM_Service/library/test_process_kairos.py
@@ -0,0 +1,15 @@
+import unittest
+
+class Mock_Alert_Config() :
+   def __init__(self, id) :
+      self.id = id
+
+class Test_Process_Kairos(unittest.TestCase):
+   def test(self) :
+      import process_kairos
+      from serviceapp import service
+      p = process_kairos.Process_Kairos(Mock_Alert_Config("a"), None, None, None)
+      self.assertEqual(p.get_target(), service.check_kairosdb_alert)
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/test_process_prometheus.py
+++ b/AoM_Service/library/test_process_prometheus.py
@@ -0,0 +1,15 @@
+import unittest
+
+class Mock_Alert_Config() :
+   def __init__(self, id) :
+      self.id = id
+
+class Test_Process_Prometheus(unittest.TestCase):
+   def test(self) :
+      import process_prometheus
+      from serviceapp import service
+      p = process_prometheus.Process_Prometheus(Mock_Alert_Config("a"), None, None, None)
+      self.assertEqual(p.get_target(), service.check_prometheus_alert)
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/test_service.py
+++ b/AoM_Service/library/test_service.py
@@ -0,0 +1,100 @@
+import unittest
+from serviceapp import service as serviceapp
+import time
+import config
+import service
+
+class Mock_ServiceApp_Service() :
+   def __init__(self, *args, **kwargs) :
+      self.args = args
+      self.kwargs = kwargs
+
+   def send_stat(self, *args, **kwargs) :
+      return
+
+class Mock_Logger() :
+   def __init__(self) :
+      self.lines = []
+      self.info = self.log
+      self.warn = self.log
+      self.warning = self.log
+      self.debug = self.log
+      self.error = self.log
+
+   def log(self, *args, **kwargs) :
+      self.lines.append("{}, {}".format(args, kwargs))
+      print(self.lines[-1])
+
+def Mock_Sleep(t) :
+   return
+
+def Mock_Get_Healthy(*args, **kwargs) :
+   return 0, 1
+
+def Mock_Distribute_Configs(*args, **kwargs) :
+   return True
+
+def Mock_Is_Valid(*args, **kwargs) :
+   return True
+
+def ignore_warnings(test_func):
+   import warnings
+   def do_test(self, *args, **kwargs):
+      with warnings.catch_warnings():
+         warnings.simplefilter("ignore")
+         test_func(self, *args, **kwargs)
+   return do_test
+
+class Test_Service(unittest.TestCase) :
+   def setUp(self) :
+      self.mock_serviceapp_service = Mock_ServiceApp_Service
+      self.was_k = serviceapp.check_kairosdb_alert
+      self.was_p = serviceapp.check_prometheus_alert
+      self.was_service = service.service
+      self.was_sleep = time.sleep
+      self.was_get_healthy = config.get_healthy_nodes_and_index
+      self.was_distribute = config.distribute_configs
+      self.was_is_valid = config.is_valid
+      serviceapp.check_kairosdb_alert = self.mock_serviceapp_service
+      serviceapp.check_prometheus_alert = self.mock_serviceapp_service
+      config.get_healthy_nodes_and_index = Mock_Get_Healthy
+      config.distribute_configs = Mock_Distribute_Configs
+      config.is_valid = Mock_Is_Valid
+      serviceapp.sleep = Mock_Sleep
+      service.sleep = Mock_Sleep
+      time.sleep = Mock_Sleep
+
+   def tearDown(self) :
+      serviceapp = self.was_service
+      serviceapp.check_kairosdb_alert = self.was_k
+      serviceapp.check_prometheus_alert = self.was_p
+      config.get_healthy_nodes_and_index = self.was_get_healthy
+      config.distribute_configs = self.was_distribute
+      config.is_valid = self.was_is_valid
+      time.sleep = self.was_sleep
+      serviceapp.sleep = self.was_sleep
+      service.sleep = self.was_sleep
+
+   @ignore_warnings
+   def test(self) :
+      import service
+      logger = Mock_Logger()
+      s = service.Service(logger, 100, "HOST", {
+         "alert_folder": "./testdata",
+         "alert_routing_config": {},
+      })
+      global first
+      first = True
+      def f() :
+         global first
+         is_first = first
+         first = False
+         return is_first
+      def purge_stale(*args) :
+         return
+      s.is_running = f
+      s.purge_stale = purge_stale
+      s.start()
+
+if __name__ == "__main__" :
+   unittest.main()
--- a/AoM_Service/library/testdata/engine.yaml
+++ b/AoM_Service/library/testdata/engine.yaml
@@ -0,0 +1,20 @@
+---
+id: sleeper_agents_milleniumfalcon_engine_failing
+service: core
+alerts:
+  slack:
+  - '#breel_testing_alerts'
+  vo:
+  - gobs-mm
+critical_upper_threshold: 1.0
+interval: 5
+start_time: '-60'
+suppressed_occurrences_threshold: 24
+end_time: now
+prometheus_url: http://big-trickster.service.eng.consul:9090
+query_type: prometheus
+query: max(sleeper_agents_milleniumfalcon_engine_failing) by (dc)
+tags:
+- dc
+url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
+service_dependencies: ['fuel']
--- a/AoM_Service/library/testdata/fuel.yaml
+++ b/AoM_Service/library/testdata/fuel.yaml
@@ -0,0 +1,18 @@
+---
+id: sleeper_agents_milleniumfalcon_fuellevel_low
+service: fuel
+alerts:
+  slack:
+  - '#breel_testing_alerts'
+  vo:
+  - gobs-mm
+critical_upper_threshold: 1.0
+interval: 5
+start_time: '-60'
+end_time: now
+prometheus_url: http://big-trickster.service.eng.consul:9090
+query_type: prometheus
+query: max(sleeper_agents_milleniumfalcon_fuellevel_low) by (dc)
+tags:
+- dc
+url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
--- a/AoM_Service/library/testdata/lightspeed.yaml
+++ b/AoM_Service/library/testdata/lightspeed.yaml
@@ -0,0 +1,20 @@
+---
+id: sleeper_agents_milleniumfalcon_lightspeed_unavailable
+service: captain
+alerts:
+  slack:
+  - '#breel_testing_alerts'
+  vo:
+  - gobs-mm
+critical_upper_threshold: 1.0
+interval: 5
+start_time: '-60'
+suppressed_occurrences_threshold: 48
+end_time: now
+prometheus_url: http://big-trickster.service.eng.consul:9090
+query_type: prometheus
+query: max(sleeper_agents_milleniumfalcon_lightspeed_unavailable) by (dc)
+tags:
+- dc
+url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
+service_dependencies: ['core']
--- a/AoM_Service/library/testdata/shields.yaml
+++ b/AoM_Service/library/testdata/shields.yaml
@@ -0,0 +1,20 @@
+---
+id: sleeper_agents_milleniumfalcon_shields_unavailable
+service: core
+alerts:
+  slack:
+  - '#breel_testing_alerts'
+  vo:
+  - gobs-mm
+critical_upper_threshold: 1.0
+interval: 5
+suppressed_occurrences_threshold: 54
+start_time: '-60'
+end_time: now
+prometheus_url: http://big-trickster.service.eng.consul:9090
+query_type: prometheus
+query: max(sleeper_agents_milleniumfalcon_shields_unavailable) by (dc)
+tags:
+- dc
+url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
+service_dependencies: ['fuel']