cold

2021-09-12 22:16:11 -06:00
commit ceeb6f0385
129 changed files with 9221 additions and 0 deletions
--- a/23
+++ b/23
@@ -0,0 +1,23 @@
 -  Supressor - New AoM field to declare dependencies' alerts - if any in dependencies' alerts are firing, then do not check
   x  AoM is not AoM - it's just configurations
      -  Each AoM is a process dedicated to looping over 1 alert
      -  Kills & recreates processes on config change (?)
      -  Oh my god monolithic functions
      -  Insert suppress at comment "send all alerts found to the alert handlers..."
      -  No unittests
      -  Seems no multiplicity
      -  serviceapp/service.py
   -  Floyd-Warshall create fully connected graph on boot/MR as CSV
 -  Reporter - Slack bot to get graph/latest check by name
   -  Lookup AoM configs in Gitlab - fetch all on interval with PAT
      -  configs stored in docker image
      -  seem to be reloadable on MRs
   -  Execute query and return
      -  See nexpose for prometheus, kairos API
      -  matplotlob.pyplot
      -  last N values
      -  warning threshold
      -  critical threshold
 -  Visualizer - New AoM field to declare service name and dependent services' names - visible map of services as alerts firing and links between
   -  Hit uchiwa API for what's firing? How to handle silenced?
   -  Does AoM have an API for what's firing?
--- a/AoM_Service/.gitignore
+++ b/AoM_Service/.gitignore
@@ -0,0 +1,12 @@
 # Created by .ignore support plugin (hsz.mobi)
 ### Vagrant template
 .vagrant/
 .idea/
 build/results
 logs/
 *.pyc
 .dockerignore
 Dockerfile
 build/builder
 site-packages.tar.gz
--- a/AoM_Service/.jenkins/JenkinsFile
+++ b/AoM_Service/.jenkins/JenkinsFile
@@ -0,0 +1,67 @@
 #!/usr/bin/env groovy
 pipeline {
    agent {label 'nomad-builder'}
    environment { 
        DOCKER_HOST = '127.0.0.1:2375'
        WORKSPACE_PATH = "/var/lib/nomad/alloc/${NOMAD_ALLOC_ID}/${NOMAD_TASK_NAME}${WORKSPACE}"
    }
    stages {
        stage('Info') {
            steps {
                sh script: 'hostname'
                echo "WORKSPACE_PATH: $WORKSPACE_PATH"
            }
        }
        stage('Build') {
            steps {
                echo "No build required"
            }
        }
        stage('Test') {
            steps {
                echo "Test done during merge request"
                //sh script: 'cd build; ./test_changed.sh "${WORKSPACE_PATH}"'
            }
        }
        stage('Deploy') {
            steps {
                script {
                if ("$GIT_BRANCH" == "origin/master"){
                        echo "Running publish script"
                        sh script: './publish.sh'
                        echo "Triggering Rundeck job"
                        script {
                            step([$class: 'RundeckNotifier', includeRundeckLogs: true, jobId: 'c5323400-0d97-4488-8cf2-1d736a5f7fb9', nodeFilters: '', options: '', rundeckInstance: 'team-rundeck -- techops', shouldFailTheBuild: true, shouldWaitForRundeckJob: true, tags: '', tailLog: false])
                        }
                    }
                    else {
                        echo "No deploy step required."
                    }
                }
            }
        }
    }
    post {
        success {
            gitlabCommitStatus(name: "$JOB_NAME") {
                // Test passed, update commit with green checkbox
            }
            //  Notify Eng Viz of successful build
           // slackSend color: 'good', message: "Passed Build: $BUILD_URL", channel: '#eng-invisibility'
        }
        failure {
            gitlabCommitStatus(name: "$JOB_NAME") {
                // Test failed, update commit status with red x
                error("Build failed, check ${BUILD_URL} for details.")
            }
            // On failure send an email to Eng Vis
            mail body: 'Please check ${BUILD_URL} or details.',
              subject: 'Jenkins job ${JOB_NAME} build #${BUILD_NUMBER} failed',
              from: 'Jenkins',
              to: 'eng-visibility@qualtrics.com'
            // Finally send a warning message to Eng Vis slack channel
            slackSend color: 'warn', message: 'Failed Build: $BUILD_URL', channel: '#eng-invisibility'
        }
    }
 }
--- a/AoM_Service/.jenkins/JenkinsFileMR
+++ b/AoM_Service/.jenkins/JenkinsFileMR
@@ -0,0 +1,58 @@
 #!/usr/bin/env groovy
 pipeline {
    agent {label 'nomad-builder'}
    environment { 
        DOCKER_HOST = '127.0.0.1:2375'
        WORKSPACE_PATH = "/var/lib/nomad/alloc/${NOMAD_ALLOC_ID}/${NOMAD_TASK_NAME}${WORKSPACE}"
    }
    stages {
        stage('Info') {
            steps {
                sh script: 'hostname'
                echo "WORKSPACE_PATH: $WORKSPACE_PATH"
            }
        }
        stage('Build') {
            steps {
                echo "Building AOM container"
                sh script: 'docker build . -t aom_test_container'
            }
        }
        stage('Test') {
            steps {
                echo "Launching container on test mode. It will take a few minutes."
                sh script: 'docker run -e TEST=true -h $(hostname) --add-host=\"telegraf:$(nslookup jenkins.eng.qops.net|grep Server | awk \'{print $2}\')\" aom_test_container'
                echo "Removing docker image and container"
                sh script: 'docker rmi -f aom_test_container'
            }
        }
        stage('Deploy') {
            steps {
                echo "No deploy step required for Merge Request"
            }
        }
    }
    post {
        success {
            gitlabCommitStatus(name: "$JOB_NAME") {
                // Test passed, update commit with green checkbox
            }
            //  Notify Eng Viz of successful build
           // slackSend color: 'good', message: "Passed Build: $BUILD_URL", channel: '#eng-invisibility'
        }
        failure {
            gitlabCommitStatus(name: "$JOB_NAME") {
                // Test failed, update commit status with red x
                error("Build failed, check ${BUILD_URL} for details.")
            }
            // On failure send an email to Eng Vis
            mail body: 'Please check ${BUILD_URL} or details.',
              subject: 'Jenkins job ${JOB_NAME} build #${BUILD_NUMBER} failed',
              from: 'Jenkins',
              to: 'eng-visibility@qualtrics.com'
            // Finally send a warning message to Eng Vis slack channel
          //  slackSend color: 'warn', message: 'Failed Build: $BUILD_URL', channel: '#eng-invisibility'
        }
    }
 }
--- a/AoM_Service/AoM_Configs/.gitignore
+++ b/AoM_Service/AoM_Configs/.gitignore
@@ -0,0 +1,12 @@
 # ignore alert configs starting with underscore -- we can create the while testing the webapp
 # and not have to worry about them getting into the repo
 alert_configs/_*.yaml
 *.swp
 .idea/
 .vagrant/
 __pycache__
 logs/
 venv/
 .vscode/
--- a/AoM_Service/AoM_Configs/.jenkins/JenkinsFile
+++ b/AoM_Service/AoM_Configs/.jenkins/JenkinsFile
@@ -0,0 +1,68 @@
 #!/usr/bin/env groovy
 pipeline {
    agent {label 'nomad-builder'}
    environment {
        DOCKER_HOST = 'tcp://127.0.0.1:2375'
        WORKSPACE_PATH = "/var/lib/nomad/alloc/${NOMAD_ALLOC_ID}/${NOMAD_TASK_NAME}${WORKSPACE}"
    }
    stages {
        stage('Info') {
            steps {
                sh script: 'hostname'
                echo "WORKSPACE_PATH: $WORKSPACE_PATH"
            }
        }
        stage('Build') {
            steps {
                echo "No build required"
            }
        }
        stage('Test') {
            steps {
                echo "Test done already on merge request"
                //sh script: 'cd build; ./test_changed.sh "${WORKSPACE_PATH}"'
                // sh script: 'cd build; ./test_changed.sh'
            }
        }
        stage('Deploy') {
            steps {
                script {
                if ("$GIT_BRANCH" == "origin/master"){
                        echo "Running publish script"
                        sh script: './publish.sh'
                        echo "Triggering Rundeck job"
                        script {
                            step([$class: 'RundeckNotifier', includeRundeckLogs: true, jobId: 'c1f0dd4e-89a0-411b-afbb-455421a2ba34', nodeFilters: '', options: '', rundeckInstance: 'team-rundeck -- techops', shouldFailTheBuild: true, shouldWaitForRundeckJob: true, tags: '', tailLog: false])
                        }
                    }
                    else {
                        echo "No deploy step required."
                    }
                }
            }
        }
    }
    post {
        success {
            gitlabCommitStatus(name: "$JOB_NAME") {
                // Test passed, update commit with green checkbox
            }
            //  Notify Eng Viz of successful build
           // slackSend color: 'good', message: "Passed Build: $BUILD_URL", channel: '#eng-invisibility'
        }
        failure {
            gitlabCommitStatus(name: "$JOB_NAME") {
                // Test failed, update commit status with red x
                error("Build failed, check ${BUILD_URL} for details.")
            }
            // On failure send an email to Eng Vis
            mail body: 'Please check ${BUILD_URL} or details.',
              subject: 'Jenkins job ${JOB_NAME} build #${BUILD_NUMBER} failed',
              from: 'Jenkins',
              to: 'eng-visibility@qualtrics.com'
            // Finally send a warning message to Eng Vis slack channel
            slackSend color: 'warn', message: 'Failed Build: $BUILD_URL', channel: '#eng-invisibility'
        }
    }
 }
--- a/AoM_Service/AoM_Configs/.jenkins/JenkinsFileMR
+++ b/AoM_Service/AoM_Configs/.jenkins/JenkinsFileMR
@@ -0,0 +1,56 @@
 #!/usr/bin/env groovy
 pipeline {
    agent {label 'nomad-builder'}
    environment { 
        DOCKER_HOST = 'tcp://127.0.0.1:2375'
        WORKSPACE_PATH = "/var/lib/nomad/alloc/${NOMAD_ALLOC_ID}/${NOMAD_TASK_NAME}${WORKSPACE}"
    }
    stages {
        stage('Info') {
            steps {
                sh script: 'hostname'
                echo "WORKSPACE_PATH: $WORKSPACE_PATH"
            }
        }
        stage('Build') {
            steps {
                echo "No build required"
            }
        }
        stage('Test') {
            steps {
                echo "Running test"
                sh script: './test_changed.sh'
                sh script: 'python validate_yaml.py'
            }
        }
        stage('Deploy') {
            steps {
                echo "No deploy step required for Merge Request"
            }
        }
    }
    post {
        success {
            gitlabCommitStatus(name: "$JOB_NAME") {
                // Test passed, update commit with green checkbox
            }
            //  Notify Eng Viz of successful build
           // slackSend color: 'good', message: "Passed Build: $BUILD_URL", channel: '#eng-invisibility'
        }
        failure {
            gitlabCommitStatus(name: "$JOB_NAME") {
                // Test failed, update commit status with red x
                error("Build failed, check ${BUILD_URL} for details.")
            }
            // On failure send an email to Eng Vis
            mail body: 'Please check ${BUILD_URL} or details.',
              subject: 'Jenkins job ${JOB_NAME} build #${BUILD_NUMBER} failed',
              from: 'Jenkins',
              to: 'eng-visibility@qualtrics.com'
            // Finally send a warning message to Eng Vis slack channel
          //  slackSend color: 'warn', message: 'Failed Build: $BUILD_URL', channel: '#eng-invisibility'
        }
    }
 }
--- a/AoM_Service/AoM_Configs/Dockerfile.webapp
+++ b/AoM_Service/AoM_Configs/Dockerfile.webapp
@@ -0,0 +1,16 @@
 FROM registry-app.eng.qops.net:5001/imported/alpine:3.9
 MAINTAINER Engineering Visibility <eng-visibility@qualtrics.com>
 COPY webapp_requirements.txt /
 COPY run_webapp.sh /
 RUN apk add --no-cache python3 curl
 RUN apk add --no-cache --virtual .build-deps build-base python3-dev \
    && pip3 install --no-cache-dir --upgrade pip \
    && pip3 install --no-cache-dir --upgrade setuptools \
    && pip3 install --no-cache-dir --upgrade -r /webapp_requirements.txt \
    && apk del .build-deps \
    && rm -rf /var/cache/apk/*
 CMD ["/run_webapp.sh"]
--- a/AoM_Service/AoM_Configs/README.md
+++ b/AoM_Service/AoM_Configs/README.md
@@ -0,0 +1,236 @@
 # README
 This is the new repository for the Alert On Metrics project configurations.
 Alert On Metrics (AOM) project allows one to setup alerts to trigger based on tracking a metric value as collected via [Metrics as a Service](https://odo.corp.qualtrics.com/wiki/index.php/Metrics_As_A_Service). You "track" your metric via a [KairosDB query](http://kairosdb-metrics.service.eng.consul:8080/) or [Prometheus query](http://big-trickster.service.eng.consul:9090/graph) so you are not limited to raw metrics - you can sample based on aggregators available in KairosDB to create new metrics views or use PromQL if you are using Prometheus. Typically people use min, max or count. All "tracked" metrics are rewritten to the metrics data store as a new metric *telgraf.aom_stats_value* but are tagged by Alert-On-Metrics to show their origin.
 You can trigger an alert based on any combination of the following:
 - An upper critical threshold based on the value of a metric increasing
 - An upper warning threshold based on the value of a metric increasing
 - A lower critical threshold based on the value of a metric decreasing
 - A lower warning threshold based on the value of a metric decreasing
 - Combine any lower and upper threshold to create a 'band'
 ---
 ## Sensu and alert subdue. NEW!
 Some changes have been introduced into latest AOM versions. Now alerts
 can be sent through Sensu (email not supported yet). Using Sensu also
 allows to create check dependencies (vo is now victorops for Sensu).
 ```
 alerts:
  sensu:
    victorops:
      'blackhole'
    slack:
      '#aom_test_channel'
    dependencies:
    - name_of_check1
    - name_of_check2
 ```
 Also filters option has been enabled. It works the same way as in
 Hiera. If you only want to receive critical alerts through one channel
 you can set "channel"_subdue to **true**.
 Example:
 ```
 filters:
  slack_subdue: true
  victorops_subdue: false
 ```
 You can make use of anything that sensu api supports. Anything you add
 to your configuration under sensu will be sent directly to the Sensu API.
 ---
 ## Availability metric.
 If you want to track how long your check is on CRITICAL state along a
 given period of time, you can enable this feature by setting this
 option to true:
 ```
 availability: true
 ```
 This will start sending metrics constantly and recording the check
 output. You can then visualize this metric within the following
 [dashboard]
 (https://grafana.eng.qops.net/d/5OsrZSdiz/aom-availability?orgId=1)
 (or you can create your own).
 To get a more accurate result don't set the refresh interval lower
 than 60 seconds.
 ---
 ## Routing per tag value. NEW!
 This feature allows you to configure a different alert routing using the values of tags in your metric. For instance, let's say you want to have a different alert policy for beta, gamma and prod:
 * *beta*: I want to alert my `#my-project-dev` channel
 * *gamma*: I want to alert my `#my-project-gamma` channel
 * *prod*: I want to alert my `#my-project` channel and page the on-call on VictorOps
 We can use the `dc` tag available in the metric query, define specific configuration for beta and gamma, and use a default one for all other values (prod in this case). Everything is configured inside the `alerts` object in the yaml configuration. Instead of directly adding the alert configuration, add a `lookup` key. Inside, you have to provide three values:
 * `default`: the alert policy to apply by default if we can't find a configuration for a specific combination of tags. The format is the exact same as classic alerts (sensu, vo, slack, etc.).
 * `tags`: the tags that will be used to lookup the alert routing configuration. You can use more than one tag.
 * `lookups`: an array, where each element specifies a combination of tag values and the routing to apply in this case.
 Here is the configuration of our example:
 ```yaml
 alerts:
  lookup:
    default:
      sensu:
        slack: my-project
        victorops: my-on-call-key
    tags:
      - dc
    lookups:
      -
        alert:
          sensu:
            slack: my-project-dev
        tags:
          dc: b1-prv
      -
        alert:
          sensu:
            slack: my-project-gamma
        tags:
          dc: g1-iad
 ```
 You can move the `lookups` part inside a separate file, so it can be reused accross different AOM configurations. To do that, instead of a `lookups` key, provide a `lookup_file` with the filename, including the extension:
 ```yaml
 alerts:
  lookup:
    default: ...
    lookup_file: my_lookup_file.yaml
    tags: ...
 ```
 Save this file under the `alert_routing_lookup` folder. The syntax for the alert routing is the same as before, it is just in a different file:
 ```yaml
 ---
 -
  alert:
    sensu:
      slack: my-project-dev
  tags:
    dc: b1-prv
 -
  alert:
    sensu:
      slack: my-project-gamma
  tags:
    dc: g1-iad
 ```
 ---
 ## How do I register a new alert with AOM?
 Alert configurations for AOM are just a Kairos DB or Prometheus query
 specified in a yaml format and wrapped in some controlling
 configuration that determines how frequently the query is executed,
 thresholds, occurrences and where to route the alerts. We have built a
 small UI that is packaged with the AOM gitlab project that will help
 you generate a suitable yaml configuration. You can rehearse your
 queries on the [KairosDB UI]
 (http://kairosdb-metrics.service.eng.consul:8080/) or at any
 Prometheus endpoint and take a look at other examples in the alert_configs/ folder for help.
 Follow the instructions below to launch the yaml generator UI on your
 local desktop and use it to generate a merge request (Docker is
 necessary).
 1. Clone the project
 2. cd into the project's directory
 3. Run the script ./generate_config.sh
 4. Once up, navigate in a browser to **localhost:80/**
 5. Fill out the form and click generate
 6. Hit **Crlt+C** when you have the alert configuration
 7. Submit the merge request in a new branch
 ---
 This process will starts a local webserver that provides a convenient interface for generating the yaml you need.
 Most of the fields have helpful info tips on what each value is and how it's used.
 ---
 ## Visualization tool [BETA]
 Along with the project, a simple python script to show how your
 metrics will look like  and to help you setting the thresholds, is
 provided. This tool requires the installation of python3 and some
 additional python3 modules:
 1. yaml
 2. json
 3. requests
 4. numpy
 5. matplotlib
 These modules should be easy to install using 'pip' or 'homebrew'.
 Usage:
 ```python3 show_config.py [X] alertname_without_yaml_extension```
 Where X is an optional parameter to define the interval lenght you
 want to display. It's a multiplier factor, set to 10 by default, that
 will increase the start_relative (so you will see more datapoints).
 The script should open a window showing the metrics along the defined
 thresholds. If the query doesn't return any value, it will exit.
 ---
 ## How does my new alert get to production?
 Once you submit a merge request, a Jenkins' job will quickly validate your alert
 files just checking it contains all required fields and proper syntax. Setting up
 appropriate thresholds and alerting channels (VictorOps, email,
 Slack) is user's responsibility.
 If Jenkins returns a PASS result for the test, new alert files will be
 merged into the master branch and a deploy job will be triggered (also
 from Jenkins). AOM service will be actively looking for changes in the
 alert_configs folder and will pick up any changes (by default every
 300 seconds).
 ## Helpful Tidbits
 __IMPORTANT:__  The alert id field must be unique, it might be useful running the
 grep command within the alert_configs directory to ensure it's not
 already defined.
 Use the [UI](http://kairosdb-metrics.service.eng.consul:8080/) on the kairosdb box to help you generate / determine the proper query.
 Remember, you want to get the query down to just one or 2 entries per *group-by* so that the service can quickly iterate over it.
 Once the request has been merged you can check if your query is getting processed by [hitting the url](http://alert-on-metrics.service.eng.consul:8080/healthcheck?verbose=true)
 You can also check out the [grafana dashboard](http://grafana-metrics.service.eng.consul:3000/dashboard/db/alert-on-metrics) that has the results of this service's queries and verify your alert metric is showing up regularly.
 From KairosDB's doc: *You must specify either start_absolute or start_relative but not
 both. Similarly, you may specify either end_absolute or end_relative
 but not both. If either end time is not specified the current date and
 time is assumed.* We suggest the usage of *end_relative* (greater than
 1 minute) as this will make steadier graphs (if you draw a graph until
 *Now*, some of the latest metrics could be missing so the end of the
 graph will be lower than it should).
 We do not recommend using *align_sampling* and *align_start_time*
 (both false by default so can be skipped) as they might change the alignment of metrics
 and change graphs over time (*If more than one are set, unexpected results will occur*).
 If you have any doubt about KairosDB's query metrics you can take a look at their documentation [here](https://kairosdb.github.io/docs/build/html/restapi/QueryMetrics.html).
 ---
 ## The Gotchas
 1. Alerts only fire when KairosDB returns a result. If your KairosDB metric query returns no results for X (currently 10) attempts any active alerts will clear with a message explaining that AOM could not get any further results from KairosDB so user must manually verify RECOVERY. Earlier versions of AOM had no flap protection like this built in. Long term we will move alerting to Sensu which has more advanced built in flap protection. You can reduce flapping of results by building your Kairos query well. Please talk to engineering visibility for help with this.
 2. Metrics are only collected every 60 seconds, so setting an interval below that will automatically get bumped up to 60 seconds from the web based config generation. Match up the interval by how often the metric is collected and measured
 3. The Email field only requires a list of names, and not the @qualtrics bit, as it will only send to qualtrics addresses using the internal-smtp1-app.eng.qops.net box
 4. Email and Slack alerts fire once during an event. This way if an outtage was occuring, you wouldn't get flooded with emails and slack alerts the entire time.
 5. Email and Slack alerts can be helpful to share with the team so they are aware of what is happening.
 6. Email and Slack alerts can be helpful when trying to figure out your alerts before you VO stuff
--- a/AoM_Service/AoM_Configs/alert_configs/engine.yaml
+++ b/AoM_Service/AoM_Configs/alert_configs/engine.yaml
@@ -0,0 +1,20 @@
 ---
 id: sleeper_agents_milleniumfalcon_engine_failing
 service: core
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 start_time: '-60'
 suppressed_occurrences_threshold: 24
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_engine_failing) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
 service_dependencies: ['fuel']
--- a/AoM_Service/AoM_Configs/alert_configs/fuel.yaml
+++ b/AoM_Service/AoM_Configs/alert_configs/fuel.yaml
@@ -0,0 +1,18 @@
 ---
 id: sleeper_agents_milleniumfalcon_fuellevel_low
 service: fuel
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 start_time: '-60'
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_fuellevel_low) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
--- a/AoM_Service/AoM_Configs/alert_configs/lightspeed.yaml
+++ b/AoM_Service/AoM_Configs/alert_configs/lightspeed.yaml
@@ -0,0 +1,20 @@
 ---
 id: sleeper_agents_milleniumfalcon_lightspeed_unavailable
 service: captain
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 start_time: '-60'
 suppressed_occurrences_threshold: 48
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_lightspeed_unavailable) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
 service_dependencies: ['core']
--- a/AoM_Service/AoM_Configs/alert_configs/shields.yaml
+++ b/AoM_Service/AoM_Configs/alert_configs/shields.yaml
@@ -0,0 +1,20 @@
 ---
 id: sleeper_agents_milleniumfalcon_shields_unavailable
 service: core
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 suppressed_occurrences_threshold: 54
 start_time: '-60'
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_shields_unavailable) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
 service_dependencies: ['fuel']
--- a/AoM_Service/AoM_Configs/alert_routing_lookup/omnicanary_devplatqe_per_test_conf.yaml
+++ b/AoM_Service/AoM_Configs/alert_routing_lookup/omnicanary_devplatqe_per_test_conf.yaml
@@ -0,0 +1,8 @@
 ---
 - 
  alert: 
    slack: 
      - "public-api-deploy-tst"
  tags: 
    canaryTest: transaction_import_distribution_1
    targetdc: fra1
--- a/AoM_Service/AoM_Configs/alert_routing_lookup/omnicanary_result_by_brand_lookup.yaml
+++ b/AoM_Service/AoM_Configs/alert_routing_lookup/omnicanary_result_by_brand_lookup.yaml
@@ -0,0 +1,365 @@
 ---
 - 
  alert:
    sensu:
      slack: es-qe-alerts
  tags: 
    brandId: aexpfeedback
 - 
  alert:
    sensu:
      slack: emea-alerts
      victorops: profserv-19
  tags: 
    brandId: airbuswea
 - 
  alert:
    sensu:
      slack: es-alaskaair
  tags: 
    brandId: alaskaair
 - 
  alert:
    sensu:
      slack: xmp-seattle-3
      victorops: xmp-seattle-3
  tags: 
    brandId: amdocs
 - 
  alert:
    sensu:
      slack: xmp-sea-automations
  tags: 
    brandId: americanairlines
 - 
  alert:
    sensu:
      slack: xmp-sea-automations
  tags: 
    brandId: anz
 - 
  alert:
    sensu:
      slack: xmp-seattle-3
      victorops: xmp-seattle-3
  tags: 
    brandId: arris
 - 
  alert:
    sensu:
      slack: emea-alerts
      victorops: profserv-19
  tags: 
    brandId: baincx
 - 
  alert:
    sensu:
      slack: xmp-sea-automations
  tags: 
    brandId: bmocx
 - 
  alert:
    sensu:
      slack: es-qe-alerts
      victorops: es-bmw-marriott
  tags: 
    brandId: bmwgroupne
 - 
  alert:
    sensu:
      slack: es-qe-alerts
      victorops: es-bmw-marriott
  tags: 
    brandId: bmwgroupnest3
 - 
  alert:
    sensu:
      slack: es-qe-alerts
      victorops: es-bmw-marriott
  tags: 
    brandId: bmwjapan
 - 
  alert:
    sensu:
      slack: es-qe-alerts
      victorops: es-bmw-marriott
  tags: 
    brandId: bmwjapanst3
 - 
  alert:
    sensu:
      slack: es-qe-alerts
      victorops: es-bmw-marriott
  tags: 
    brandId: bmwna
 - 
  alert:
    sensu:
      slack: es-qe-alerts
      victorops: es-bmw-marriott
  tags: 
    brandId: bmwnast3
 - 
  alert:
    sensu:
      slack: es-qe-alerts
      victorops: es-bmw-marriott
  tags: 
    brandId: bmwvertriebsgmbh
 - 
  alert:
    sensu:
      slack: es-qe-alerts
      victorops: es-bmw-marriott
  tags: 
    brandId: bmwvertriebsgmbhst3
 - 
  alert:
    sensu:
      slack: caterpillar
      victorops: profserv-14
  tags: 
    brandId: catcustomerinsights
 - 
  alert:
    sensu:
      slack: century-link
      victorops: xmp-seattle-4
  tags: 
    brandId: centurylink
 - 
  alert:
    sensu:
      slack: xmp-seattle-4
      victorops: xmp-seattle-4
  tags: 
    brandId: ciscoengineering
 - 
  alert:
    sensu:
      slack: es-alerts
      victorops: profserv
  tags: 
    brandId: clientdashboards    
 - 
  alert:
    sensu:
      slack: es-alerts
      victorops: profserv
  tags: 
    brandId: cms
 - 
  alert:
    sensu:
      slack: TODO
  tags: 
    brandId: cocacolaperform
 - 
  alert:
    sensu:
      slack: dish
  tags: 
    brandId: dishvoc
 - 
  alert:
    sensu:
      slack: es-alerts
  tags: 
    brandId: dowcorning
 - 
  alert:
    sensu:
      slack: es-alerts
      victorops: profserv
  tags: 
    brandId: drtoddhall
 -
  alert:
    sensu:
      slack: es-gs-compare
      victorops: xmp-seattle-3
  tags:
    brandId: goldmansachs
 -
  alert:
    sensu:
      slack: xmp-sea-automations
  tags: 
    brandId: harvard
 - 
  alert:
    sensu:
      slack: es-alerts
      victorops: profserv
  tags: 
    brandId: ibm
 - 
  alert:
    sensu:
      slack: xmp-seattle-3
      victorops: xmp-seattle-3
  tags: 
    brandId: jcibuildings
 - 
  alert:
    sensu:
      slack: xmp-seattle-3
      victorops: xmp-seattle-3
  tags: 
    brandId: johnsoncontrols2
 - 
  alert:
    sensu:
      slack: es-alerts
      victorops: profserv
  tags: 
    brandId: kubota
 - 
  alert:
    sensu:
      slack: liberty-mutual
  tags: 
    brandId: libertymutualvoc
 - 
  alert:
    sensu:
      slack: es-qe-alerts
      victorops: es-bmw-marriott
  tags: 
    brandId: marriottvacationclub
 - 
  alert:
    sensu:
      slack: es-alerts
  tags: 
    brandId: mastercard
 - 
  alert:
    sensu:
      slack: xmp-seattle-4
      victorops: xmp-seattle-4
  tags: 
    brandId: nielsenapac
 - 
  alert:
    sensu:
      slack: TODO
  tags: 
    brandId: optumrx
 - 
  alert:
    sensu:
      slack: xmp-seattle-4
      victorops: xmp-seattle-4
  tags: 
    brandId: nielsenscarborough
 - 
  alert:
    sensu:
      slack: xmp-seattle-3
      victorops: xmp-seattle-3
  tags: 
    brandId: rogers
 - 
  alert:
    sensu:
      slack: es-alerts
  tags: 
    brandId: samsungeurope
 - 
  alert:
    sensu:
      slack: emea-alerts
      victorops: profserv-19
  tags: 
    brandId: telenorreporting
 - 
  alert:
    sensu:
      slack: es-alerts
      victorops: profserv
  tags: 
    brandId: thermoking
 - 
  alert:
    sensu:
      slack: philips-es
  tags: 
    brandId: tnsnipophilips
 - 
  alert:
    sensu:
      slack: travelers_coord
      victorops: profserv-14
  tags: 
    brandId: travelers
 - 
  alert:
    sensu:
      slack: xmp-sea-automations
  tags: 
    brandId: uhcdr
 - 
  alert:
    sensu:
      slack: xmp-sea-automations
  tags: 
    brandId: uhcmr
 - 
  alert:
    sensu:
      slack: xmp-sea-automations
  tags: 
    brandId: uhcgm
 - 
  alert:
    sensu:
      slack: TODO
  tags: 
    brandId: uhg
 - 
  alert:
    sensu:
      slack: xmp-sea-automations
  tags: 
    brandId: uhg1
 - 
  alert:
    sensu:
      slack: es-alerts
      victorops: profserv
  tags: 
    brandId: underarmour
 - 
  alert:
    sensu:
      slack: es-alerts
      victorops: profserv
  tags: 
    brandId: unum
 - 
  alert:
    sensu:
      slack: TODO
  tags: 
    brandId: usaast3
 - 
  alert:
    sensu:
      slack: xmp-sea-automations
  tags: 
    brandId: usbank
 - 
  alert:
    sensu:
      slack: es-alerts
      victorops: profserv
  tags: 
    brandId: uscd
 - 
  alert:
    sensu:
      slack: xmp-seattle-3
      victorops: xmp-seattle-3
  tags: 
    brandId: walkersandbox
--- a/AoM_Service/AoM_Configs/aom_webapp.py
+++ b/AoM_Service/AoM_Configs/aom_webapp.py
@@ -0,0 +1,30 @@
 #! /usr/bin/python3
 # aom_builder.py
 # point of the builder is to generate a valid yaml config that could be read in to the main app by
 # asking for clarifying questions on what to check and how to alert on it
 # this comes from 4 questions:
 # When to query
 # What to query for
 # Whats an alert
 # Who to Alert
 from webapp import app
 from library.logger import AlertLogging
 from library.args import get_builder_args
 log = AlertLogging('aom')
 log.start()
 log.start_log_file("logs/aom_builder.log")
 if __name__ == "__main__":
    # GET ARGS AND START LOGGING
    args = get_builder_args()
    # logger.init("logs/aom_builder.log", args['log_level'])
    # aom_logger = logging.getLogger(__name__)
    log.info("Logger Initialized")
    # ENABLE SESSIONS TO KEEP YAML FILE STATE BETWEEN PAGES
    log.info("Starting webapp")
    app.run(host='localhost', port=args['port'], debug=True)
--- a/AoM_Service/AoM_Configs/generate_config.sh
+++ b/AoM_Service/AoM_Configs/generate_config.sh
@@ -0,0 +1,16 @@
 #!/bin/bash
 trap ctrl_c INT
 function ctrl_c() {
    docker stop aom_web
    docker ps -a | awk '{ print $1,$2 }' | grep aom_web | awk '{print $1 }' | xargs -I {} docker rm {}
 }    
 docker build -f Dockerfile.webapp -t aom_web . && \
 docker run -d -v$(pwd):/web -p80:5000 --name aom_web aom_web && \
 docker logs -f aom_web
--- a/AoM_Service/AoM_Configs/library/init.py
+++ b/AoM_Service/AoM_Configs/library/init.py
--- a/AoM_Service/AoM_Configs/library/args.py
+++ b/AoM_Service/AoM_Configs/library/args.py
@@ -0,0 +1,84 @@
 # Contians the arg parser options.
 import argparse
 import sys
 def get_builder_args():
    """
    Gets the arguments passed in to the aom_builder main call
    :return: parser object
    """
    parser = argparse.ArgumentParser(description="Generates a valid yaml file for alerting on metrics. "
                                                 "If you are familiar with the yaml structure for an alert"
                                                 "you don't have to use this builder, it's just convenient")
    parser.add_argument('-q', '--query', help="The Kariosdb query string to use")
    parser.add_argument('-i', '--interval', type=int, default=60, help="The interval that the check will run. "
                                                                       "This value is in seconds")
    parser.add_argument('-t', '--threshold', '--upperthreshold', help="The upper threshold is the value that when reached will cause an alert "
                                                                      "depending on the threshold logic. "
                                                                      "Use in conjunction with lower threshold to define a normal band.")
    parser.add_argument('-b', '--lowerthreshold', help="The lower threshold is the value that when reached will cause an alert "
                                                                      "depending on the threshold logic"
                                                                      "Use in conjunction with upper threshold to define a normal band.")
    parser.add_argument('-m', '--measure', choices=['gt', 'lt', 'eq'], help="The measure to use to compare the "
                                                                            "threshold to the values of the alerts")
    parser.add_argument('-a', '--alert_config', help='A valid Yaml representation of your alerting block')
    parser.add_argument('-l', '--log_level', type=int, default=0, help="The log level for the aom_builder run. "
                                                                       "[0=Error, 1=Info, 2=Debug]")
    parser.add_argument('-p', '--port', type=int, default=8080, help="The port to run the webapp on")
    return args_to_dict(parser)
 def get_tester_service_args():
    """
    Gets arguments passed into aom_tester.py
    Returns: parser object
    """
    parser = argparse.ArgumentParser(description="Parameters to start the alerting on metrics dummy tester service")
    parser.add_argument('-l', '--log_level', type=int, default=0, help="The log level for the aom_service app"
                                                                       "[0=Error, 1=Info, 2=Debug]")
    parser.add_argument('-a', '--alert_configs', default=None,
                        help="If provided will override the folder location read from the config with the value passed "
                             "in. Is helpful for testing and troubleshooting alerts")
    parser.add_argument('--hostname', help="If provided, will override the actual hostname check with this value")
    parser.add_argument('-p', '--port', type=int, default=8080, help="The port to run the webapp on")
    return args_to_dict(parser)
 def get_service_args():
    """
    Gets arguments passed into aom_service.py
    Returns: parser object
    """
    parser = argparse.ArgumentParser(description="Parameters to start the alerting on metrics service")
    parser.add_argument('-l', '--log_level', type=int, default=0, help="The log level for the aom_service app"
                                                                       "[0=Error, 1=Info, 2=Debug]")
    parser.add_argument('-a', '--alert_configs', default=None,
                        help="If provided will override the folder location read from the config with the value passed "
                             "in. Is helpful for testing and troubleshooting alerts")
    parser.add_argument('-o', '--override', action='store_true', help="Overrides the check leader election value")
    parser.add_argument('--hostname', help="If provided, will override the actual hostname check with this value")
    parser.add_argument('-p', '--port', type=int, default=8080, help="The port to run the webapp on")
    return args_to_dict(parser)
 def args_to_dict(parsed_args):
    """
    Converts the argument parser object to a dict
    Args:
        parsed_args: Arg parser object
    Returns:
        Dictionary of arguments
    """
    try:
        arg_list = parsed_args.parse_args()
        # RETURN A DICT OF ARGUMENTS
        arg_dict = dict()
        for val in vars(arg_list):
            arg_dict[val] = getattr(arg_list, val)
        return arg_dict
    except argparse.ArgumentError:
        parsed_args.print_help()
        sys.exit(1)
--- a/AoM_Service/AoM_Configs/library/config.py
+++ b/AoM_Service/AoM_Configs/library/config.py
@@ -0,0 +1,22 @@
 # config.py
 import logging
 import glob
 import yaml
 logger = logging.getLogger(__name__)
 def glob_the_configs(config_path):
    """
    Args:
        config_path (string): relative path to the configs
    Returns:
        List of configs
    """
    alert_list = []
    for config_file in glob.glob(config_path + "/*.yaml"):
        logger.debug("Found {} config".format(config_file))
        # LOAD CONFIG
        alert_list.append(yaml.load(open(config_file, 'rb').read()))
    logger.info("Loaded {} configs".format(len(alert_list)))
    return alert_list
--- a/AoM_Service/AoM_Configs/library/logger.py
+++ b/AoM_Service/AoM_Configs/library/logger.py
@@ -0,0 +1,118 @@
 # logger.py
 import logging
 import logging.handlers
 import os
 logging.getLogger('requests').setLevel(logging.ERROR)
 logging.getLogger('urllib3').setLevel(logging.ERROR)
 logging.getLogger('werkzeug').setLevel(logging.ERROR)
 class SingleLevelFilter(logging.Filter):
    def __init__(self, passlevel, reject):
        """
            initilizer(constructor) of the singlelevelfilter
            @param passlevel (int) - the int value of the level of the log
            @param reject (bool) - if true will return if the record level is not equal to the passlevel
            @return SingleLevelFilter object
            @note Sets some object parameters
        """
        self.passlevel = passlevel
        self.reject = reject
    def filter(self, record):
        """
            Returns True/False depending on parameters
            @param record (Log int) - the record that the filter belongs to
            @return bool - True/False depending on what self.reject is set to and what record.levelno and self.passlevel are set to
            @note This causes either only logging of the exact same level to get logged, or only logging other than the same level to get logged
        """
        if self.reject:
            return (record.levelno != self.passlevel)
        else:
            return (record.levelno == self.passlevel)
 class AlertLogging(logging.Logger):
    """
    Class Object to handle the logging of the alert on metrics service
    starts at Error level and can flip on (and add) an additional log file and
    Debug logger as needed.
    """
    def __init__(self, name):
        """
        Inits the formaters and logger
        """
        self.name = name
        self.debug_formatter = logging.Formatter(
            "%(asctime)s - [%(levelname)s] - [%(module)s:%(lineno)d] - %(message)s", "%m-%d %H:%M:%S")
        self.standard_formatter = logging.Formatter("%(asctime)s - [%(levelname)s] -  %(message)s",
                                                    "%m-%d %H:%M:%S")
        logging.getLogger()
        logging.Logger.__init__(self, name, logging.DEBUG)
        logging.setLoggerClass(AlertLogging)
    def start(self):
        """
        Returns:
        """
        info_handler = logging.StreamHandler()
        info_handler.setLevel(logging.INFO)
        info_handler.setFormatter(self.standard_formatter)
        self.addHandler(info_handler)
        return self
    def start_log_file(self, file_path, mode='a'):
        """
        Creates a separate log file handler
        Args:
            file_path: path to the log file
            mode: the type of mode to open the file handler with
        Returns:
        """
        self.log_path = file_path
        work_folder = os.path.dirname(file_path)
        if len(work_folder) > 0 and not os.path.exists(work_folder):
            os.makedirs(work_folder)
        self.log_handler = logging.FileHandler(file_path, mode)
        self.log_handler.setLevel(logging.DEBUG)
        self.log_handler.setFormatter(self.debug_formatter)
        self.addHandler(self.log_handler)
    def stop_log_file(self):
        """
        Closes Log file and sets the handler to None
        Returns:
        """
        self.log_handler.close()
        self.removeHandler(self.log_handler)
        self.log_handler = None
    def start_debug(self):
        """
        Returns:
        """
        self.debug_handler = logging.StreamHandler()
        self.debug_handler.setLevel(logging.DEBUG)
        self.debug_handler.addFilter(SingleLevelFilter(logging.DEBUG, False))
        self.debug_handler.setFormatter(self.debug_formatter)
        self.addHandler(self.debug_handler)
    def stop_debug(self):
        """
        stop the debugger
        Returns:
        """
        self.removeHandler(self.debug_handler)
        self.debug_handler = None
--- a/AoM_Service/AoM_Configs/publish.sh
+++ b/AoM_Service/AoM_Configs/publish.sh
@@ -0,0 +1,42 @@
 #!/bin/bash
 GIT_COMMIT=$(git rev-parse HEAD)
 if [[ $GIT_COMMIT == "" ]]; then
 	echo "--Missing required GIT_COMMIT var. Aborting..."
 	exit 1
 fi
 #Setup useful vars
 team="engvis"
 app="alert-on-metrics-configs"
 registryV2="registry-app.eng.qops.net:5001"
 pathV2="${registryV2}/${team}/${app}"
 commitV2="${pathV2}:${GIT_COMMIT}"
 latestV2="${pathV2}:latest"
 # In case you use relative paths
 DIR=$(cd $(dirname $BASH_SOURCE[0]) && pwd)
 cd $DIR
 echo "--Publishing $app $GIT_COMMIT"
 echo "--Removing old image, so they don't accumulate"
 docker rmi $latestV2
 #Now fail if anything doesn't work
 set -e
 if [ -f $app/build.sh ]
 then
    echo "--Running pre build steps"
    $app/build.sh
 fi
 docker build --pull=true --tag="$commitV2" --tag "$latestV2" .
 echo "--Publishing app container"
 docker push $commitV2
 docker push $latestV2
--- a/AoM_Service/AoM_Configs/run.sh
+++ b/AoM_Service/AoM_Configs/run.sh
@@ -0,0 +1,6 @@
 #!/bin/sh
 rsync -a --delete /alert_configs/ /mountpoint/configs/git/
 rsync -a --delete /alert_routing_lookup/ /mountpoint/alert_routing_lookup/
 ls -l /mountpoint/configs/git/
--- a/AoM_Service/AoM_Configs/run_webapp.sh
+++ b/AoM_Service/AoM_Configs/run_webapp.sh
@@ -0,0 +1,5 @@
 #!/bin/ash
 export FLASK_APP=/web/aom_webapp.py
 export FLASK_DEBUG=1
 cd /web; flask run --host=0.0.0.0
--- a/AoM_Service/AoM_Configs/service.yaml
+++ b/AoM_Service/AoM_Configs/service.yaml
@@ -0,0 +1,25 @@
 #=======================#
 # All them URLS and tokens
 #=======================#
 kairosdb_url: "http://kairosdb-metrics.service.eng.consul:8080/"
 victorops_url: "https://alert.victorops.com/integrations/generic/20131114/alert/07f108fe-9183-45c3-a888-19e1432806c5/"
 slack_url: "https://slack.com/api/chat.postMessage"
 slack_token: "xoxb-76976722775-WY6vtKAk0SQEb8qcbFkLMV81"
 smtp_server: "internal-smtp1-app.eng.qops.net:2525"
 consul_url: "http://consul1-app.eng.qops.net:8500/v1/kv/service/alert-on-metrics/leader-lock"
 sensu_endpoint: "https://sensu-api.eng.qops.net:443/results"
 #=======================#
 # Logging Information
 #=======================#
 log_path: "logs/aom_service.log"
 #=======================#
 # alerts folder
 #=======================#
 alert_folder: "alert_configs"
 #=======================#
 # request timeout value
 #=======================#
 timeout: 90
--- a/AoM_Service/AoM_Configs/show_config.py
+++ b/AoM_Service/AoM_Configs/show_config.py
@@ -0,0 +1,104 @@
 import glob
 import yaml
 import json
 import os
 import sys
 import time
 import re
 import requests
 import numpy
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 import datetime
 import random
 import warnings
 warnings.filterwarnings("ignore")
 #from pdb import set_trace as bp
 timeout = 180
 # if no argument print help and exit
 if len(sys.argv) == 1:
    print("You need to specify an alert config file.")
    exit(1)
 #else 
 config_file = 'alert_configs/'+sys.argv[1]+'.yaml'
 # test file exists or exit 
 alert_config = yaml.load(open(config_file, 'rb').read())
 # We will show 10 intervals by default
 if len(sys.argv) == 3:
    interval = int(sys.argv[2])
 else:
    interval = 10
 alert_config['query']['start_relative']['value'] = str(int(alert_config['query']['start_relative']['value'])*interval)
 kairosdb_url = "http://kairosdb-metrics.service.eng.consul:8080/"
 query_url = os.path.join(kairosdb_url + "api/v1/datapoints/query")
 #ret = requests.post(query_url, data=json.dumps(query), timeout)
 ret = requests.post(query_url, json.dumps(alert_config['query']), timeout)
 results = ret.json()['queries'][0]['results']
 # Transforming to human readable data
 # for result in results[0]['values']:
 #     result[0] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(result[0]/1000)) 
 #     result[0] = datetime.datetime.strptime(result[0],'%Y-%m-%d %H:%M:%S')
 for result in results:
    for value in result['values']:
 #        bp()
        # transform date from epoch to human readable format
        value[0] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(value[0]/1000))
        # transform date string to datetime object
        value[0] = datetime.datetime.strptime(value[0],'%Y-%m-%d %H:%M:%S')
    series = numpy.array(result['values'])
    label_str  = str(result['group_by'][0].get('group', ''))
    line_color = tuple(numpy.random.random(size=3))
    plt.plot_date(series[:,0],series[:,1], marker='.', color=line_color, linestyle='-', label=label_str)
 #series = numpy.array(results[0]['values'])
 #converted_dates = map(datetime.datetime.strptime, datelist, len(datelist)*['%Y-%m-%d %H:%M:%S'])
 #x_axis = (converted_dates)
 formatter = mdates.DateFormatter('%H:%M:%S')
 # ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
 # series = series.astype(numpy.unicode, copy=False)
 ax = plt.subplot()
 #ax.set_xlabel('TIME')
 #ax.set_ylabel('VALUE')
 #bc = plt.axes()
 #bc.xaxis.set_major_formatter(formatter)
 #plt.plot_date(series[:,0],series[:,1], marker='o', color='b', linestyle='-')
 #plt.plot_date(converted_dates,series[:,1], marker='o', color='b', linestyle='-')
 #ax.set_xticks(series[:,0])
 #ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
 #ax.xaxis.set_minor_formatter(mdates.DateFormatter("%Y-%m"))
 # ax = plt.subplot.gcf().axes[0]
 #ax.set_title(sys.argv[1])
 ax.xaxis.set_major_formatter(formatter)
 #plt.xaxis.set_major_formatter(formatter)
 plt.title(sys.argv[1])
 plt.legend()
 # pyplot.gcf().autofmt_xdate(rotation=25)
 #ax.xaxis_date()
 # ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
 # ax.xaxis.set_minor_formatter(mdates.DateFormatter("%Y-%m"))
 # ax.plot(series[:,0],series[:,1], marker='o', color='b', linestyle='-')
 myRe = re.compile('^(?!occurrences).*_threshold$')
 # Adding thresholds to the graph
 for key in alert_config:
    if myRe.match(key):
        plt.axhline(y=float(alert_config[key]), color='r', linestyle='--', label=str(key))
        plt.text(series[0][0],float(alert_config[key]),key)
 #plt.gcf().autofmt_xdate()
 #ax = .add_axes([0,0,1,1])
 plt.gcf().autofmt_xdate(rotation=25)        
 #plt.axhline(y=500000, color='o', linestyle='-')
 plt.show()
 #results[0]['values']
--- a/AoM_Service/AoM_Configs/test_changed.sh
+++ b/AoM_Service/AoM_Configs/test_changed.sh
@@ -0,0 +1,30 @@
 #!/bin/bash
 set -x
 if [ -z $GIT_COMMIT ]; then
    echo "Expected env var 'GIT_COMMIT' to be set. Exiting..."
    exit 1
 fi
 echo "Check that only alert confings are being pushed"
 echo "$PWD"
 for file in $(git diff-tree -r --name-only ${GIT_COMMIT}^1 ${GIT_COMMIT}); do
    new_id=$(grep ^id\: $file)
    if [ ! -z "$new_id" ]; then
        total_id=$(grep "$new_id" alert_configs/*.yaml | wc -l)
        if [ $total_id -gt 1 ] ; then
            echo "Duplicated id found! Please update the id of the alert configuration"
            exit 1
        fi
    fi
    dir=$(dirname ${file})
    # alert_configs/ change triggers a test of the new or changed aler configs
    if [ "$dir" == "alert_configs" ] || [ "$dir" == "alert_routing_lookup" ] ; then
        echo "Good to merge"
        exit 0
    else
        echo "Only automatic merges allowed for alert config files"
        exit 1
    fi
 done
--- a/AoM_Service/AoM_Configs/validate_yaml.py
+++ b/AoM_Service/AoM_Configs/validate_yaml.py
@@ -0,0 +1,60 @@
 import yaml
 import glob
 if __name__ == "__main__":
    alert_list = []
    bad_alert_list = []
    print("Collecting all yaml configs")
    # COLLECT CONFIG FILES
    for config_file in glob.glob("./alert_configs/*.yaml"):
        print("Found {} config".format(config_file))
        alert_list.append(config_file)
    print("Collecting all yaml configs")
    # PARSE CONFIG FILES AND VALIDATE THEIR VALUES
    for alert in alert_list:
        print("Validating file {}".format(alert))
        try:
            config = yaml.load(open(alert, 'rb').read())
            assert len(config['alerts']) > 0, "No Alerts configured, this is a dead config"
            assert len(config['query']) > 0, "No Query, this is a dead config"
            assert config['interval'] >= 30, "Intervals less than 30 are invalid"
            assert len(config['id']) > 0, "Alert ID is empty, this is a dead config"
            if config.get('query_type') == 'prometheus':
                assert type(config['query']) is str, "Invalid Prometheus query"
                assert "$" not in config['query'], "Prometheus query should not contain variables"
            else:
                assert type(config['query']) is dict, "Kairosdb Query string cannot be validated as proper JSON"
                defined_tags = set(config['query']['metrics'][0]['tags'].keys()).union({'','dc','fqdn'})
                # IF THERE IS AGGREGATION WE HAVE TO ADD THESE TAGS
                if 'group_by' in config['query']['metrics'][0]:
                    defined_tags.update(set(config['query']['metrics'][0]['group_by'][0]['tags']))
                # for undefined_tag in set(config['tags']).difference(defined_tags):
                #     print("WARNING! {} tag is not defined on the query. Please make sure it does exist to "\
                #     "prevent empty results".format(undefined_tag))
                # OUR MINIMUM THRESHOLD NEED
            assert 'critical_lower_threshold' in config or 'critical_upper_threshold' in config or \
                'warning_lower_threshold' in config or 'warning_upper_threshold' in config, \
                "Config must have at least one threshold set."
            # JUST MAKE SURE YOU ARE NOT DOING SOMETHING STUPID WITH WARNING COMING AFTER CRITICAL
            if 'warning_lower_threshold' in config and 'critical_lower_threshold' in config:
                assert config['critical_lower_threshold'] < config['warning_lower_threshold'], \
                    "Lower Critical must be less than Lower Warning"
            if 'warning_upper_threshold' in config and 'critical_upper_threshold' in config:
                assert config['critical_upper_threshold'] > config['warning_upper_threshold'], \
                    "Upper Critical must be greater than Upper Warning"
            if 'occurrences_threshold' in config:
                assert config['occurrences_threshold'] >= 1, \
                    "Having an occurrences value less than 2 is assumed and pointless to specify"
        except Exception as e:
            print("Invalid config file: {}\n{}".format(alert, str(e)))
            bad_alert_list.append("{}\n{}".format(alert, str(e)))
    # WRITE OUT BAD CONFIGS TO THE RESULTS FILE
    # with open("./results/test_results.log", "w+") as f:
    #     for alert in bad_alert_list:
    #         f.write("Config is bad: {}".format(alert.replace('\n', ' ')))
    for alert in bad_alert_list:
        print("Config is bad: {}".format(alert.replace('\n', ' ')))
    if bad_alert_list:
        exit(1)
--- a/AoM_Service/AoM_Configs/webapp/init.py
+++ b/AoM_Service/AoM_Configs/webapp/init.py
@@ -0,0 +1,7 @@
 from flask import Flask, render_template, request, session
 app = Flask(__name__)
 app.config['SESSION_TYPE'] = 'filesystem'
 app.config['SECRET_KEY'] = 'super secret key'
 import webapp.views
--- a/AoM_Service/AoM_Configs/webapp/render.py
+++ b/AoM_Service/AoM_Configs/webapp/render.py
@@ -0,0 +1,139 @@
 import yaml
 import os
 import json
 import traceback
 import sys
 from library.logger import AlertLogging
 logger = AlertLogging('aom')
 logger.start()
 def render_config(config):
    """
    Reads in the config dict and renders to file. config usually from web interface
    Args:
        config: The config to use to generate the yaml file
    Returns:
        boolean string of 0 if successful and the yaml as string, or 1 and the error
    """
    try:
        # GET THE NAME OF THE FILE FROM THE CONFIG
        file_name = ''.join([config['alert_name'], '.yaml'])
        logger.debug("Filename: {}".format(file_name))
        # THIS SHOULD BE A PARAMETER PASSED IN
        file_path = os.path.join('alert_configs', file_name)
        logger.debug("Full path: {}".format(file_path))
        # SANITIZE THE CONFIG TO A NEW OBJECT
        yaml_config = {'alerts': {},
                       'id': config['alert_name'],
                       'interval': 30 if int(config['interval']) < 30 else int(config['interval'])}
        # SET THE INTERVAL TO lowest value of 30 seconds
        # SPLIT THE ALERTS INTO A LIST
        if 'vo' in config:
            yaml_config['alerts']['vo'] = [x for x in config['vo_list'].split(',') if x]
        if 'email' in config:
            yaml_config['alerts']['email'] = [x for x in config['email_list'].split(',') if x]
        if 'slack' in config:
            yaml_config['alerts']['slack'] = [x for x in config['slack_list'].split(',') if x]
        # GET THRESHOLDS AS FLOATS
        if 'critical_threshold' in config:
            if config['critical_upper_threshold'] is not "":
                yaml_config['critical_upper_threshold'] = float(config['critical_threshold'])
        if 'critical_upper_threshold' in config:
            if config['critical_upper_threshold'] is not "":
                yaml_config['critical_upper_threshold'] = float(config['critical_upper_threshold'])
        if 'warning_threshold' in config:
            yaml_config['warning_upper_threshold'] = float(config['warning_threshold'])
        if 'warning_upper_threshold' in config:
            yaml_config['warning_upper_threshold'] = float(config['warning_upper_threshold'])
        if 'critical_lower_threshold' in config:
            if config['critical_lower_threshold'] is not "":
                yaml_config['critical_lower_threshold'] = float(config['critical_lower_threshold'])
        if 'warning_lower_threshold' in config:
            yaml_config['warning_lower_threshold'] = float(config['warning_lower_threshold'])
        if 'occurrences' in config:
            yaml_config['occurrences_threshold'] = int(config['occurrences_threshold'])
        # PARSE THE QUERY OUT INTO A DICT OBJECT
        if config['prometheus_query']:
            yaml_config['query_type'] = 'prometheus'
            yaml_config['prometheus_url'] = config['prometheus_url']
            yaml_config['query'] = config['prometheus_query']
            yaml_config['start_time'] = config['start_time']
            yaml_config['end_time'] = config['end_time']
        else:
            yaml_config['query_type'] = 'kairosdb'
            yaml_config['query'] = json.loads(config['kairosdb_query'])
        # GET THE TAGS, COMMA SEPARATED
        tags = config['tags'].split(',')
        yaml_config['tags'] = [x for x in tags if x]
        # GET THE URL
        yaml_config['url'] = config['url']
        # WRITE TO FILE
        yaml_str = yaml.dump(yaml_config, default_flow_style=False, explicit_start=True)
        with open(file_path, 'w') as f:
            f.write(yaml_str)
        return 0, yaml_str
    except json.decoder.JSONDecodeError:
        return 1, "Query string is not valid json: {}".format(traceback.format_stack())
    except Exception as e:
        logger.error("Unable to render yaml config file to disk")
        _, _, ex_traceback = sys.exc_info()
        return 1, render_traceback(e, ex_traceback)
 def render_yaml(alert_id):
    """
    Reads in a yaml file into the config that the web expects.
    Args:
        alert_id: then name of the config
    Returns:
        Dictionary
    """
    file_name = ''.join([alert_id, '.yaml'])
    file_path = os.path.join('alert_configs', file_name)
    config = yaml.load(open(file_path, 'r').read())
    yaml_config = dict()
    yaml_config['alert_name'] = config['id']
    yaml_config['interval'] = config['interval']
    if 'critical_threshold' in config:
        yaml_config['critical_upper_threshold'] = config['critical_threshold']
    if 'critical_upper_threshold' in config:
        yaml_config['critical_upper_threshold'] = config['critical_upper_threshold']
    if 'critical_lower_threshold' in config:
        yaml_config['critical_lower_threshold'] = config['critical_lower_threshold']
    if 'warning_threshold' in config:
        yaml_config['warning_upper_threshold'] = config['warning_threshold']
    if 'warning_upper_threshold' in config:
        yaml_config['warning_upper_threshold'] = config['warning_upper_threshold']
    if 'warning_lower_threshold' in config:
        yaml_config['warning_lower_threshold'] = config['warning_lower_threshold']
    if 'occurrences_threshold' in config:
        yaml_config['occurrences_threshold'] = config['occurrences_threshold']
    yaml_config['url'] = config['url']
    if 'email' in config['alerts']:
        yaml_config['email'] = 'on'
        yaml_config['email_list'] = ','.join(config['alerts']['email'])
    if 'vo' in config['alerts']:
        yaml_config['vo'] = 'on'
        yaml_config['vo_list'] = ','.join(config['alerts']['vo'])
    if 'slack' in config['alerts']:
        yaml_config['slack'] = 'on'
        yaml_config['slack_list'] = ','.join(config['alerts']['slack'])
    if 'tags' in config:
        yaml_config['tags'] = ','.join(config['tags'])
    if config.get('query_type') == 'prometheus':
        yaml_config['prometheus_query'] = config['query']
        yaml_config['prometheus_url'] = config['prometheus_url']
        yaml_config['start_time'] = config['start_time']
        yaml_config['end_time'] = config['end_time']
    else:
        yaml_config['kairosdb_query'] = json.dumps(config['query'], sort_keys=True, indent=4, separators=(',', ': '))
    return yaml_config
 def render_traceback(ex, ex_traceback):
    tb_lines = traceback.format_exception(ex.__class__, ex, ex_traceback)
    logger.exception("Exception")
    return '\n'.join(tb_lines)
--- a/AoM_Service/AoM_Configs/webapp/static/bootstrap-theme.min.css
+++ b/AoM_Service/AoM_Configs/webapp/static/bootstrap-theme.min.css
--- a/AoM_Service/AoM_Configs/webapp/static/bootstrap.min.css
+++ b/AoM_Service/AoM_Configs/webapp/static/bootstrap.min.css
--- a/AoM_Service/AoM_Configs/webapp/static/bootstrap.min.js
+++ b/AoM_Service/AoM_Configs/webapp/static/bootstrap.min.js
--- a/AoM_Service/AoM_Configs/webapp/static/style.css
+++ b/AoM_Service/AoM_Configs/webapp/static/style.css
@@ -0,0 +1,29 @@
 body            { font-family: sans-serif; background: #eee; }
 a, h1, h2       { color: #377BA8; }
 h1, h2          { font-family: 'Georgia', serif; margin: 0; }
 h1              { border-bottom: 2px solid #eee; }
 h2              { font-size: 1.2em; }
 .page           { margin: 2em auto; width: 45em; border: 5px solid #ccc;
                  padding: 0.8em; background: white; }
 .entries        { list-style: none; margin: 0; padding: 0; }
 .entries li     { margin: 0.8em 1.2em; }
 .entries li h2  { margin-left: -1em; }
 .add-entry      { font-size: 0.9em; border-bottom: 1px solid #ccc; }
 .add-entry dl   { font-weight: bold; }
 .metanav        { text-align: right; font-size: 0.8em; padding: 0.3em;
                  margin-bottom: 1em; background: #fafafa; }
 .flash          { background: #CEE5F5; padding: 0.5em;
                  border: 1px solid #AACBE2; }
 .error          { background: #F0D6D6; padding: 0.5em; }
 /#.button         { border-top:   2px solid #a3ceda;
                  border-left:  2px solid #a3ceda;
                  border-right: 2px solid #4f6267;
                  border-bottom: 2px solid #4F6267;
                  padding: 1px 20px !important;
                  font-size: 14px !important;
                  background-color: #CEE5F5;
                  font-weight: bold;
                  color: #2d525d; }
                  #/
 .container       { width: 500px; clear: both;}
--- a/AoM_Service/AoM_Configs/webapp/templates/debug.html
+++ b/AoM_Service/AoM_Configs/webapp/templates/debug.html
@@ -0,0 +1,28 @@
 {% extends "header.html" %}
 {% block body %}
 <h2>Form Elements</h2><br />
 <table>
 {% for key, value in query.items() %}
   <tr>
        <th> {{ key }} </th>
        <td> {{ value }} </td>
   </tr>
 {% endfor %}
 </table><br/>
 <p>
    {{ query.alert_name }}
 </p>
 <h2>Rendered Config File</h2><br />
 <p>{{ file_path }}</p>
 <p>
    {% for line in file_contents %}
    <div>{{ line|safe }}</div>
    {% endfor %}
 </p>
 <br />
 <form action="{{ url_for('re_build', alert_id=query.alert_name) }}" id="re_build" method="post">
    <p>
        <input type="submit" id="submit" class="btn btn-primary" value="Return to Form?">
    </p>
 </form>
 {% endblock %}
--- a/AoM_Service/AoM_Configs/webapp/templates/error.html
+++ b/AoM_Service/AoM_Configs/webapp/templates/error.html
@@ -0,0 +1,6 @@
 {% extends "header.html" %}
 {% block body %}
 <h1>Error Rendering config:</h1>
 <p>{{ message }}</p>
 <p><a href="{{ url_for('index') }}">Return to Creation Page?</a></p>
 {% endblock %}
--- a/AoM_Service/AoM_Configs/webapp/templates/header.html
+++ b/AoM_Service/AoM_Configs/webapp/templates/header.html
@@ -0,0 +1,67 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-COMPATIBLE" content="IE=edge">
    <meta name="viewport" content="width=device-width, intial-scale=1">
    <title>Alerting On Metrics Yaml Builder</title>
    <link rel=stylesheet type=text/css href="{{ url_for('static', filename='bootstrap.min.css') }}">
    <link rel="stylesheet" type=text/css href="{{ url_for('static', filename='style.css') }}">
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js"></script>
    <script src="{{ url_for('static', filename='bootstrap.min.js') }}"></script>
    <script type="text/javascript">
     function dynInput(cbox) {
     console.log(cbox)
      if (cbox.checked) {
        var input = document.createElement("input");
        input.type = "text";
        input.id = cbox.name + "_list";
        input.name = cbox.name + "_list";
        document.getElementById("insertinputs_" + cbox.name).appendChild(input);
      } else {
        document.getElementById(cbox.name + "_list").remove();
      }
    }
    function dynEnable(cbox) {
        console.log(cbox);
        var theId = "#" + cbox.name + "_list";
        console.log(theId);
        if (cbox.checked){
            $(theId)[0].disabled = false;
        } else {
            $(theId)[0].disabled = true;
        }
    }
    function dynThreshold(cbox) {
        var theId = "#" + cbox.name + "_threshold";
        if (cbox.checked){
            $(theId)[0].disabled = false;
        } else {
            $(theId)[0].disabled = true;
        }
    }
    function forceLower(strInput){
        strInput.value=strInput.value.toLowerCase().replace(" ","_");
    }
    function forceComma(strInput){
        strInput.value=strInput.value.replace(" ",",");
     }
    function forcePositive(strInput){
        if (parseInt(strInput.value) <= 1) {
            strInput.value = 2
        }
    }
    </script>
 </head>
 <body>
 <div class=page>
    {% block body %}{% endblock %}
--- a/AoM_Service/AoM_Configs/webapp/templates/index.html
+++ b/AoM_Service/AoM_Configs/webapp/templates/index.html
@@ -0,0 +1,966 @@
 {% extends "header.html" %}
 {% block body %}
 <form action="{{url_for('index')}}" id="builder" method="post" class="form-horizontal">
    <div class="row">
        <div class="col-sm-12">
            <h3 class="text-center">Alert Meta</h3>
        </div>
    </div>
    <!-- Alert Name -->
    <div class="form-group">
        <div class="col-sm-4">
            <label for="alert_name" class="control-label">Alert Name:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#alertidModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="alertidModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="myModalLabel">Alert Name</h4>
                        </div>
                        <div class="modal-body">
                            <p>The alert name acts as both the name of the .yaml file and the id for the alert. The
                                alert name becomes part of what shows up in the title / subject when an alert is
                                triggered</p>
                            <p>Picking an alert name that already exists will overwrite the .yaml configuration file so
                                be aware of what you choose</p>
                            <p>The Alert name is also how this alert will show up in Victorops, Slack and Email
                                (Depending on what options you choose for the Alerting</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <input type="text" id="alert_name" class="form-control" name="alert_name" value="{{ alert_name }}"
                   onkeyup="return forceLower(this);">
        </div>
    </div>
    <!-- Check Interval -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="interval">Check Interval: </label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#intervalModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="intervalModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="checkInterval">Check Interval</h4>
                        </div>
                        <div class="modal-body">
                            <p>The check interval is how often the check will run the query (in seconds) and measure the
                                results</p>
                            <p>Anything less than 30 seconds will automatically be bumped up
                                to 30 seconds. This is due to the fact that metrics are collected every 30 seconds, so
                                checking more often than this would just result in the same values returned from the
                                query
                                as nothing would have changed yet</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <input type="number" id="interval" class="form-control" name="interval" value="{{ interval }}">
        </div>
    </div>
    <!-- Upper Critical Threshold -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="criticalUpperThreshold">Upper Critical Threshold: </label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#criticalUpperThresholdModal">
                info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="criticalUpperThresholdModal" tabindex="-1" role="dialog"
                 aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="criticalUpperThresholdTitle">Critical Threshold</h4>
                        </div>
                        <div class="modal-body">
                            <p>This is a Floating Point or Int that when the results back from the query exceeds this
                                number, a critical alert will trigger.</p>
                            <p>Only Critical Alerts will also trigger emails and slack alerts (if set)</p>
                            <p>Your query needs to be simplified down to just one or two
                                values per grouping (A start and end metric). The alerting system will look at all
                                values per grouping and check if any of the values are over the threshold to send out an
                                alert</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-sm-7">
            <input type="number" class="form-control" id="criticalUpperThreshold" name="critical_upper_threshold"
                   value="{{ critical_upper_threshold }}"
                   step="0.01"
                   onkeypress="validate(event)">
        </div>
    </div>
    <!-- Lower Critical Threshold -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="criticalLowerThreshold">Lower Critical Threshold: </label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#criticalLowerThresholdModal">
                info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="criticalLowerThresholdModal" tabindex="-1" role="dialog"
                 aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="criticalLowerThresholdTitle">Lower Critical Threshold</h4>
                        </div>
                        <div class="modal-body">
                            <p>This is a Floating Point or Int that when the results back from the query drops below this
                                number, a critical alert will trigger.</p>
                            <p>Only Critical Alerts will also trigger emails and slack alerts (if set)</p>
                            <p>Your query needs to be simplified down to just one or two
                                values per grouping (A start and end metric). The alerting system will look at all
                                values per grouping and check if any of the values are over the threshold to send out an
                                alert</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-sm-7">
            <input type="number" class="form-control" id="lower_criticalThreshold" name="critical_lower_threshold"
                   value="{{ critical_lower_threshold }}"
                   step="0.01"
                   onkeypress="validate(event)">
        </div>
    </div>
    <!-- Upper Warning Threshold -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="warningUpperThreshold">Upper Warning Threshold: </label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#warningUpperThresholdModal">
                info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="warningUpperThresholdModal" tabindex="-1" role="dialog"
                 aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="warningUpperThresholdTitle">Upper Warning Threshold</h4>
                        </div>
                        <div class="modal-body">
                            <p>This is a Floating Point or Int that when the results back from the query exceeds this
                                number, a warning alert will trigger.</p>
                            <p>Warnings will not trigger Email or Slack alerts (if set)</p>
                            <p>Your query needs to be simplified down to just one or two
                                values per grouping (A start and end metric). The alerting system will look at all
                                values per grouping and check if any of the values are over the threshold to send out an
                                alert</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <div class="input-group">
                <span class="input-group-addon">
                    {% if warning_upper_threshold %}
                    {% set warning_upper_checked='checked' %}
                    {% else %}
                    {% set warning_upper_disabled='disabled' %}
                    {% endif %}
                    <input type="checkbox" name="warning_upper" id="warning_upper" aria-label="..." onclick="dynThreshold(this);" {{
                           warning_upper_checked }}>
                </span>
                <input type="number" name="warning_upper_threshold" class="form-control" id="warning_upper_threshold"
                       value="{{ warning_upper_threshold }}"
                       aria-label="..." step="0.01" {{ warning_upper_disabled }}>
            </div>
        </div>
    </div>
    <!-- Lower Warning Threshold -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="warningLowerThreshold">Lower Warning Threshold: </label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#warningLowerThresholdModal">
                info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="warningLowerThresholdModal" tabindex="-1" role="dialog"
                 aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="warningLowerThresholdTitle">Lower Warning Threshold</h4>
                        </div>
                        <div class="modal-body">
                            <p>This is a Floating Point or Int that when the results back from the query drops below this
                                number, a warning alert will trigger.</p>
                            <p>Warnings will not trigger Email or Slack alerts (if set)</p>
                            <p>Your query needs to be simplified down to just one or two
                                values per grouping (A start and end metric). The alerting system will look at all
                                values per grouping and check if any of the values are over the threshold to send out an
                                alert</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <div class="input-group">
                <span class="input-group-addon">
                    {% if warning_lower_threshold %}
                    {% set warning_lower_checked='checked' %}
                    {% else %}
                    {% set warning_lower_disabled='disabled' %}
                    {% endif %}
                    <input type="checkbox" name="warning_lower" id="warning_lower" aria-label="..." onclick="dynThreshold(this);" {{
                           warning_lower_checked }}>
                </span>
                <input type="number" name="warning_lower_threshold" class="form-control" id="warning_lower_threshold"
                       value="{{ warning_lower_threshold }}"
                       aria-label="..." step="0.01" {{ warning_lower_disabled }}>
            </div>
        </div>
    </div>
    <!-- Occurrences -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="occurrences_threshold">Frequency: </label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#occurrencesModal">
                info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="occurrencesModal" tabindex="-1" role="dialog"
                 aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="occurrencesTitle">Frequency</h4>
                        </div>
                        <div class="modal-body">
                            <p>The occurrences value, when set, will determine how many times the alert has to exceed the
                                threshold in order for an alert to trigger.</p>
                            <p>This is particularly useful for metrics that can be spikey and resolve quickly,
                                using occurrences allows you to only be alerted when a spike is no longer spiking but
                                maintaining the rate over the period of time</p>
                            <p>This is compared once every interval, so if your alert is set to 5 minutes, with a
                                occurrences of 3, you'd have to have the threshold exceeded for 15 minutes before any
                                alerts
                                are sent out.</p>
                            <p>The occurrences value is optional, and if not enabled, the service assumes that after 1 query
                                exceeding the threshold is enough to trigger alerts. So in this way having an occurrences value
                                set
                                to 1 or not enabled does the same thing.</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <div class="input-group">
                <span class="input-group-addon">
                    {% if  occurrences_threshold and occurrences_threshold is number and occurrences_threshold > 1 %}
                    {% set occurrences_checked='checked' %}
                    {% else %}
                    {% set occurrences_disabled='disabled' %}
                    {% endif %}
                    <input type="checkbox" name="occurrences" id="occurrences" aria-label="..."
                           onclick="dynThreshold(this);" {{
                           occurrences_checked }}>
                </span>
                <input type="number" name="occurrences_threshold" class="form-control" id="occurrences_threshold"
                       value="{{ occurrences_threshold }}"
                       aria-label="..." step="1" min="2" {{ occurrences_disabled }} onkeyup="return forcePositive(this);">
            </div>
        </div>
    </div>
    <!-- Tags -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="tags">Tags:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#tagsModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="tagsModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="tagsTitle">Tags</h4>
                        </div>
                        <div class="modal-body">
                            <p>A comma seperated list of tags used to include in the alert subject</p>
                            <p>In the event of an alert, the tags will be used to look up distinctive
                                information and
                                include as part of the alert</p>
                            <p>For example including the dc tag in an alert means that if an alert occurs, the
                                alerting
                                system will look up the dc value from the returned query and included it as part
                                of the
                                alert subject</p>
                            <p>These are the same tag values used to build kiarosdb queries</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <input type="text" name="tags" id="tags" class="form-control" value="{{ tags }}" ,
                   onkeyup="return forceComma(this);">
        </div>
    </div>
    <div class="row">
        <div class="col-sm-12">
            <h3 class="text-center">Notifications</h3>
        </div>
    </div>
    <!-- VictorOps Alerts -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="vo">VictorOps Alert:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#voModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="voModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="voTitle">Victor Ops Alert List</h4>
                        </div>
                        <div class="modal-body">
                            <p>A comma seperated list of victorops routing keys</p>
                            <p>In the event of an alert, the Ids listed here will recieve a victorops alert</p>
                            <p>If the checkbox isn't selected, when generating the .yaml config the values
                                listed will
                                be ignored</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <div class="input-group">
                <span class="input-group-addon">
                    {% if vo=="on" %}
                    {% set vo_checked='checked' %}
                    {% else %}
                    {% set vo_disabled='disabled' %}
                    {% endif %}
                    <input type="checkbox" name="vo" id="vo" aria-label="..." onclick="dynEnable(this);" {{ vo_checked
                           }}>
                </span>
                <input type="text" class="form-control" name="vo_list" id="vo_list" aria-label="..."
                       value="{{ vo_list }}" onkeyup="return forceComma(this);" {{ vo_disabled }}>
            </div>
        </div>
    </div>
    <!-- Email Alerts -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="email">Email Alert:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#emailModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="emailModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="emailTitle">Email Alert List</h4>
                        </div>
                        <div class="modal-body">
                            <p>A comma seperated list of email names to send alerts to</p>
                            <p>In the event of an alert, the names listed here will recieve an email alert</p>
                            <p>The alerting system appends an @qualtrics.com to the names listed here, so there
                                is no
                                need to include the @domain as it's assumed all alerting emails would go to a
                                qualtrics
                                address</p>
                            <p>Also the SMTP server can only send to @qualtrics addresses anyways</p>
                            <p>For example sending an email to both netops and devops on an alert would be <b>devops,netops</b>
                                in the text box.</p>
                            <p>If the checkbox isn't selected, when generating the .yaml config the values
                                listed will
                                be ignored</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <div class="input-group">
                <span class="input-group-addon">
                    {% if email=="on" %}
                    {% set email_checked='checked' %}
                    {% else %}
                    {% set email_disabled='disabled' %}
                    {% endif %}
                    <input type="checkbox" name="email" id="email" aria-label="..." onclick="dynEnable(this);" {{
                           email_checked }}>
                </span>
                <input type="text" name="email_list" class="form-control" id="email_list"
                       value="{{ email_list }}"
                       aria-label="..." onkeyup="return forceComma(this);" {{ email_disabled }}>
            </div>
        </div>
    </div>
    <!-- Slack Alert List -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="slack">Slack Alert:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#slackModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="slackModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="slackTitle">Slack Alert List</h4>
                        </div>
                        <div class="modal-body">
                            <p>A comma seperated list of slack names to send alerts to</p>
                            <p>In the event of an alert, the names listed here will recieve a slack alert from a
                                slackbot</p>
                            <p>You must include a @ for direct message alerts and # for channel alerts</p>
                            <p>For example, if the DevOps team wanted to get an alert in slack, the value in the
                                text
                                box would be <b>#devops</b>.
                                If I wanted to also include a direct message as well then the value would be
                                <b>#devops,@codyc</b></p>
                            <p>Don't troll people with your metric alerts bombing peopls slack, it's unkind</p>
                            <p>If the checkbox isn't selected, when generating the .yaml config the values
                                listed will
                                be ignored</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <div class="input-group">
                <span class="input-group-addon">
                    {% if slack=="on" %}
                    {% set slack_checked='checked' %}
                    {% else %}
                    {% set slack_disabled='disabled' %}
                    {% endif %}
                    <input type="checkbox" name="slack" id="slack" aria-label="..." onclick="dynEnable(this);" {{
                           slack_checked }}>
                </span>
                <span id="insertinputs_slack"></span>
                <input type="text" name="slack_list" class="form-control" id="slack_list"
                       value="{{ slack_list }}"
                       aria-label="..." onkeyup="return forceComma(this);" {{ slack_disabled }}>
            </div>
        </div>
    </div>
    <div class="row">
        <div class="col-sm-12">
            <h3 class="text-center">Dashboard</h3>
        </div>
    </div>
    <!-- Dashboard URL -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="query">Dashboard URL:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#dashboardModal">
                info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="dashboardModal" tabindex="-1" role="dialog"
                 aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="dashboardTitle">Dashboard URL</h4>
                        </div>
                        <div class="modal-body">
                            <p>Most queries are built based on some dashboard already built in grafana</p>
                            <p>By including the URL to that dashboard, the oncall engineer recieving the alert
                                will be
                                able to click the link in the alert and get a better picture of what this alert
                                is and
                                and how it relates to the datacenter</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-sm-7">
            <input type="text" name="url" id="url" class="form-control" value="{{ url }}">
        </div>
    </div>
    <div class="row">
        <div class="col-sm-12">
            <h3 class="text-center">Kairosdb Query</h3>
        </div>
    </div>
    <!-- KairosDB Query -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="query">KariosDB Query:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#queryModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="queryModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="queryTitle">KariosDB Query</h4>
                        </div>
                        <div class="modal-body">
                            <p>Paste in your KariosDB Query that you have already worked out.</p>
                            <p>You can generate your query by going to the <a
                                    href="http://kairosdb-metrics.service.eng.consul:8080/" target="_blank">KariosDB
                                UI
                                in eng</a></p>
                            <p>When generating your metric you will want to get the return values down to just 1
                                or 2
                                results per grouping. This can be done by sending the query to the MAX or MIN
                                aggregators (depending on your logic needs) as the last aggregator in the
                                query</p>
                            <p>You will also want to include a time offset, typically 5 minutes is used for when
                                to
                                start (as from 5 minutes ago to now). Setting the MAX aggregator to this value
                                is
                                usually typical</p>
                            <p>Once you have generated your query and it's returning the results you expect,
                                click the
                                <b>Show Query</b> button on the kairosDB UI and copy the results into this field
                            </p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-sm-7">
            <textarea name="kairosdb_query" id="kairosdb_query" class="form-control" rows="12" cols="50">{{ kairosdb_query }}</textarea>
        </div>
    </div>
    <div class="row">
        <div class="col-sm-12">
            <h3 class="text-center">Prometheus Query</h3>
        </div>
    </div>
    <!-- Prometheus URL -->
    <div class="form-group">
        <div class="col-sm-4">
            <label for="prometheus_url" class="control-label">Prometheus URL:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#prometheusurlModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="prometheusurlModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="myModalLabel">Prometheus URL</h4>
                        </div>
                        <div class="modal-body">
                            <p>URL for the prometheus server</p>
                            <p>Shared, production Prometheus URLs are currently:
                                <ul>
                                    <li>http://big-trickster.service.eng.consul:9090</li>
                                </ul>
                            </p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <input type="text" id="prometheus_url" class="form-control" name="prometheus_url" value="{{ prometheus_url }}"
                   onkeyup="return forceLower(this);">
        </div>
    </div>
    <!-- Prometheus Query -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="query">Prometheus Query:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#prometheusQueryModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="prometheusQueryModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="queryTitle">Prometheus Query</h4>
                        </div>
                        <div class="modal-body">
                            <p>Paste in your Prometheus Query that you have already worked out.</p>
                            <p>You can generate your query by going to the url of your prometheus endpoint.  Eng Vis plans on adding a smart router for this in the future so all instances will be exposed via a single smart proxy, but for now you'll need to know the name. </p><p><a
                                    href="http://big-trickster.service.eng.consul:9090/graph" target="_blank">Prometheus Host Metrics
                                UI
                                in eng</a>
                                </p><p>
                                <a
                                    href="http://big-trickster.service.eng.consul:9090/graph" target="_blank">Prometheus StatsD and other Metrics
                                UI
                                in eng</a></p>
                            <p>When creating a query, keep in mind a single value returned is gonna be the most
                                useful
                                , so stuff like "topk(1,yourmetrics)" are gonna be good choices.  However, if
                                your query has multiple return values AOM will use last value.</p>
                            <p>So if you use a step/duration of 60 and a timspan of 300 between start
                                and
                                end you'll get back 5 values and the last will be used.
                                </p>
                            <p><a href="https://prometheus.io/docs/prometheus/latest/querying/functions/" target="_blank">Prometheus Functions</a></p>
                            <p>
                            <a href="https://prometheus.io/docs/prometheus/latest/querying/operators/" target="_blank">Prometheus Operators</a>
                            </p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-sm-7">
            <textarea name="prometheus_query" id="prometheus_query" class="form-control" rows="12" cols="50">{{ prometheus_query }}</textarea>
        </div>
    </div>
    <!-- Start Time -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="start_time">Start Time: </label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#startTimeModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="startTimeModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="startTime">Start Time</h4>
                        </div>
                        <div class="modal-body">
                            <p>This should be a relative time in seconds like '-600' for 10m, defaults to '-300'</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <input type="text" id="start_time" class="form-control" name="start_time" value="{{ start_time }}">
        </div>
    </div>
    <!-- End Time -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="end_time">End Time: </label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#endTimeModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="endTimeModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="endTime">End Time</h4>
                        </div>
                        <div class="modal-body">
                            <p>This can be 'now' (default) or some relative offset like '-30' in seconds</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <input type="text" id="end_time" class="form-control" name="end_time" value="{{ end_time }}">
        </div>
    </div>
    <div class="row">
        <div class="col-sm-12">
            <h3 class="text-center">Actions</h3>
        </div>
    </div>
    <!-- Load Config File -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="loadFile">Load Config From File:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#loadModal">info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="loadModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="loadTitle">Load Config from file</h4>
                        </div>
                        <div class="modal-body">
                            <p>Load a config already generated to file into the UI</p>
                            <p>This is handy when you need to make minor changes to a query, or add additional
                                alerting
                                values or change thresholds. Or if you are just terrified of yaml.</p>
                            <p>Hit the drop down to see a list of all alert configs (the names generated from
                                the values
                                used in the Alert Name field) Hit the Go and the config will load into all the
                                fields</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <div class="input-group">
                <select name="loadFile" id="loadFile" class="form-control">
                    <option value="" selected></option>
                    {% for f in alert_list %}
                    <option value="{{ f }}">{{ f }}</option>
                    {% endfor %}
                </select>
                <span class="input-group-btn">
                        <input type="submit" name="generate" id="submitFiles" class="btn btn-primary" value="Go">
                </span>
            </div>
        </div>
    </div>
    <!-- Submit Form -->
    <div class="form-group">
        <div class="col-sm-4">
            <label class="control-label" for="submit">Generate YAML:</label>
        </div>
        <div class="col-sm-1">
            <!-- Button trigger modal -->
            <button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#generateModal">
                info
            </button>
            <!-- Modal -->
            <div class="modal fade" id="generateModal" tabindex="-1" role="dialog"
                 aria-labelledby="myModalLabel">
                <div class="modal-dialog" role="document">
                    <div class="modal-content">
                        <div class="modal-header">
                            <h4 class="modal-title" id="generateTitle">Generate Alert Config</h4>
                        </div>
                        <div class="modal-body">
                            <p>When you are ready to take the values in the form and generate a alert config
                                .yaml file,
                                hit the button</p>
                            <p>This will generate a .yaml file based on the alert name. So for example if one
                                was to
                                have the value <b>mcp_errors_per_dc</b> as an alert name, the resulting file
                                would be
                                <b>mcp_errors_per_dc.yaml</b></p>
                            <p>This <b>will</b> overwrite a .yaml file if the alert name is the same as an
                                already
                                existing file</p>
                            <p>If there are any errors generating the config, the resulting page will include
                                the error
                                message and give you the ability to return back to this page with your form
                                saved</p>
                        </div>
                        <div class="modal-footer">
                            <button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <div class="col-md-7">
            <input type="submit" id="submit" name='generate' class='btn btn-primary' value="generate"
                   class="button">
        </div>
    </div>
 </form>
 {% endblock %}
--- a/AoM_Service/AoM_Configs/webapp/templates/layout.html
+++ b/AoM_Service/AoM_Configs/webapp/templates/layout.html
@@ -0,0 +1,4 @@
 {% extends "header.html" %}
 {% block body %}
 <h2>Complete all values in the form below</h2>
 {% endblock %}
--- a/AoM_Service/AoM_Configs/webapp/views.py
+++ b/AoM_Service/AoM_Configs/webapp/views.py
@@ -0,0 +1,69 @@
 # views.py
 import glob
 import json
 import os
 import yaml
 from flask import session
 from library.logger import AlertLogging
 from webapp import app, render_template, request, render
 logger = AlertLogging('aom')
 logger.start()
 logger.start_log_file("logs/aom_service.log")
@app.route('/', methods=['GET', 'POST'])
 def index():
    logger.debug("Request Method: {}".format(request.method))
    if request.method == 'GET':
        # GET BLOB OF FILES
        service_config = yaml.load(open('service.yaml', 'r').read())
        alert_list = sorted([os.path.splitext(os.path.basename(x))[0] for x in
                             glob.glob(service_config['alert_folder'] + "/*.yaml")])
        if 'yaml_config' in session:
            return render_template('index.html', **json.loads(session['yaml_config']), alert_list=alert_list)
        else:
            return render_template('index.html', alert_list=alert_list)
    elif request.method == 'POST':
        logger.info("Got a form")
        if 'go' in request.form['generate'].lower():
            return re_build(request.form['loadFile'])
        yaml_config = dict()
        ret = ''
        try:
            for field_name, value in request.form.items():
                yaml_config[field_name] = value
            code, ret = render.render_config(yaml_config)
            assert code == 0
            return render_template('debug.html', query=yaml_config,
                                   file_path='alert_configs/{}.yaml'.format(yaml_config['alert_name']),
                                   file_contents=ret.split('\n'))
        except AssertionError:
            session['yaml_config'] = json.dumps(yaml_config)
            return render_template('error.html', message="Failed to render to file: {}".format(ret))
        except Exception as e:
            return render_template('error.html', message=str(e))
@app.route('/build/<alert_id>', methods=['POST'])
 def re_build(alert_id):
    # READ IN CONFIG FROM ID
    config = render.render_yaml(alert_id)
    service_config = yaml.load(open('service.yaml', 'r').read())
    alert_list = sorted([os.path.splitext(os.path.basename(x))[0] for x in
                         glob.glob(service_config['alert_folder'] + "/*.yaml")])
    return render_template('index.html', **config, alert_list=alert_list)
@app.route("/debug/")
 def toggle_debug():
    if logger.debug_handler:
        logger.stop_debug()
        logger.info("Debug Stopped")
    else:
        logger.start_debug()
        logger.debug("Debug Started")
    return index()
--- a/AoM_Service/AoM_Configs/webapp_requirements.txt
+++ b/AoM_Service/AoM_Configs/webapp_requirements.txt
@@ -0,0 +1,3 @@
 requests
 pyaml
 flask
--- a/AoM_Service/README.md
+++ b/AoM_Service/README.md
@@ -0,0 +1,8 @@
 # IMPORTANT NOTICE:
 Alert configurations have been moved to [AlertOnMetrics]
 (https://gitlab-app.eng.qops.net/engvis/AlertOnMetricsConfigs). 
 This will allow more flexibility to the project. Merge requests will
 be automatically validated, merged and deployed if it passes the
 validation stage.
--- a/AoM_Service/alert_configs/engine.yaml
+++ b/AoM_Service/alert_configs/engine.yaml
@@ -0,0 +1,20 @@
 ---
 id: sleeper_agents_milleniumfalcon_engine_failing
 service: core
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 start_time: '-60'
 suppressed_occurrences_threshold: 24
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_engine_failing) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
 service_dependencies: ['fuel']
--- a/AoM_Service/alert_configs/fuel.yaml
+++ b/AoM_Service/alert_configs/fuel.yaml
@@ -0,0 +1,18 @@
 ---
 id: sleeper_agents_milleniumfalcon_fuellevel_low
 service: fuel
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 start_time: '-60'
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_fuellevel_low) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
--- a/AoM_Service/alert_configs/lightspeed.yaml
+++ b/AoM_Service/alert_configs/lightspeed.yaml
@@ -0,0 +1,20 @@
 ---
 id: sleeper_agents_milleniumfalcon_lightspeed_unavailable
 service: captain
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 start_time: '-60'
 suppressed_occurrences_threshold: 48
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_lightspeed_unavailable) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
 service_dependencies: ['core']
--- a/AoM_Service/alert_configs/shields.yaml
+++ b/AoM_Service/alert_configs/shields.yaml
@@ -0,0 +1,20 @@
 ---
 id: sleeper_agents_milleniumfalcon_shields_unavailable
 service: core
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 suppressed_occurrences_threshold: 54
 start_time: '-60'
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_shields_unavailable) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
 service_dependencies: ['fuel']
--- a/AoM_Service/aom_service.py
+++ b/AoM_Service/aom_service.py
@@ -0,0 +1,81 @@
 #!/usr/bin/python3
 """ Alert On Metrics Project"""
 import logging
 import multiprocessing
 import json
 import base64
 import os
 import subprocess
 from time import time, sleep
 import requests
 import yaml
 from sanic import Sanic, response
 from library.args import get_service_args
 from library.config import glob_the_configs
 from library.logger import AlertLogging
 from library.service import Service
 LOG = AlertLogging('aom')
 LOG.start()
 LOG.start_log_file("logs/aom_service.log")
 APP = Sanic()
 SERVICE_JOB = multiprocessing.Value('i', 0)
 NUM_JOBS = multiprocessing.Value('i', 0)
 LEADERSHIP = multiprocessing.Value('i', 0)
 LEADER_STATUS = None
 LEADER_TIME = None
 CONSUL_URL = None
 LEADER_OVERRIDE = None
 HOSTNAME = None
 SERVICE_CONFIG = None
@APP.route("/")
 async def index(_):
    """
    Return total number of jobs
    """
    global NUM_JOBS
    return response.json({"job_count": NUM_JOBS.value})
@APP.route('/healthcheck')
 async def health(request):
    """
    Flask healthcheck so that consul and friends work, see this as a service
    Returns:
    json object of status: ok
    """
    LOG.debug("healthcheck")
    service_process = multiprocessing.Process(target=start_service, \
                    args=(LOG, SERVICE_CONFIG['alert_reload_interval']), \
                    name="service", daemon=False)
    # TRY TO START SERVICE, IF LEADER AND NOT RUNNING
    if SERVICE_JOB.value == 0:
        LOG.info("Starting alerts background job")
        SERVICE_JOB.value += 1
        service_process.start()#start_service(log)
    return response.json({"status": "ok"}, 200)
 def start_service(log, reload_interval):
   s = Service(log, reload_interval, HOSTNAME, SERVICE_CONFIG)
   s.start()
 if __name__ == "__main__":
    # GET ARGS AND START LOGGING
    ARGS = get_service_args()
    logging.setLoggerClass(AlertLogging)
    LOG.info("Starting Service")
    # GET SERVICE CONFIG
    LEADER_OVERRIDE = ARGS['override']
    HOSTNAME = ARGS['hostname']
    SERVICE_CONFIG = yaml.safe_load(open('service.yaml', 'r').read())
    if ARGS['alert_configs'] is not None:
        SERVICE_CONFIG['alert_folder'] = ARGS['alert_configs']
    if ARGS['alert_routing_lookup'] is not None:
        SERVICE_CONFIG['alert_routing_lookup'] = ARGS['alert_routing_lookup']
    # SET CONSUL URL FOR LEADER CHECK
    CONSUL_URL = SERVICE_CONFIG['consul_url']
    # START THE MAIN SERVICE
    APP.run(host="0.0.0.0", port=ARGS['port'])
--- a/AoM_Service/aom_test.py
+++ b/AoM_Service/aom_test.py
@@ -0,0 +1,121 @@
 import json
 import time
 import requests
 import yaml
 service_config = yaml.load(open('service.yaml', 'r').read())
 kairos_url = service_config['kairosdb_url'] + "api/v1/datapoints/"
 kairos_query = kairos_url + "query"
 metrics_list = []
 status1 = "RECOVERY"
 status2 = "WARNING"
 status3 = "CRITICAL"
 json_string1 = """{"name": "aom_test_metric","datapoints": """
 json_string2 = ""","tags": {"host": "aom_host","data_center": "AOM"},"ttl": 500}"""
 # WRITE ALERT CONFIG FILE
 alert_file = {'alerts': {'sensu': {'slack': 'aom_test_channel'}},
              'critical_lower_threshold': 100,
              'critical_upper_threshold': 5000,
              'id': 'test_metric',
              'interval': 30,
              'occurrences_threshold': 1,
              'query': {'cache_time': 0,
                        'end_relative': {'unit': 'seconds', 'value': '30'},
                        'metrics': [{'name': 'aom_test_metric', 'tags': {}}],
                        'start_relative': {'unit': 'seconds', 'value': '60'}},
              'tags': {},
              'url': 'AOM_TESTING',
              'warning_lower_threshold': 1000,
              'warning_upper_threshold': 2000}
 query_intro = """{
        "metrics": [
          {
            "tags": {
              "alert": [
                "test_metric"
              ]
            },
           "name": "telegraf.aom_"""
 query_outro = """_value",
      "aggregators": [
        {
          "name": "sum",
          "align_sampling": true,
          "sampling": {
            "value": "9",
            "unit": "minutes"
          },
          "align_start_time": false
        }
      ]
    }
  ],
  "cache_time": 0,
  "start_relative": {
    "value": "8",
    "unit": "minutes"
  }
 }"""
 def main():
    # noinspection PyBroadException
    try:
        with open('alert_configs/test.yaml', 'w') as yaml_file:
            yaml.dump(alert_file, yaml_file, default_flow_style=False)
    except Exception:
        print("Error writing alert config file")
        return False
    now = int(time.time() * 1000)
    metrics_list.append([now, 1501])
    now += 32000
    metrics_list.append([now, 202])
    now += 32000
    metrics_list.append([now, 23])
    now += 32000
    metrics_list.append([now, 1504])
    now += 32000
    metrics_list.append([now, 2005])
    now += 32000
    metrics_list.append([now, 5006])
    now += 32000
    metrics_list.append([now, 1507])
    full_string = json_string1 + str(metrics_list) + json_string2
    try:
        ret = requests.post(kairos_url, data=json.dumps(json.loads(full_string)), timeout=200)
        assert ret.status_code == 204, "Wrong status code received from KairosDB"
    except AssertionError as e:
        print("Error: {}".format(str(e)))
    except Exception as e:
        print("Problem talking to KairosDB: {}".format(str(e)))
        return False
    print("Metrics sent to KairosDB. Check alerts in the #aom_test_channel in Slack")
    time.sleep(360)
    try:
        ret = requests.post(kairos_query, data=json.dumps(json.loads(query_intro + status1 + query_outro)), timeout=200)
        print("Recovery {}".format(dict(ret.json())['queries'][0]['results'][0]['values'][0][1]))
        assert dict(ret.json())['queries'][0]['results'][0]['values'][0][1] == 2, "Wrong RECOVERY result"
        ret = requests.post(kairos_query, data=json.dumps(json.loads(query_intro + status2 + query_outro)), timeout=200)
        print("Warning {}".format(dict(ret.json())['queries'][0]['results'][0]['values'][0][1]))
        assert dict(ret.json())['queries'][0]['results'][0]['values'][0][1] == 2, "Wrong WARNING result"
        ret = requests.post(kairos_query, data=json.dumps(json.loads(query_intro + status3 + query_outro)), timeout=200)
        print("Critical {}".format(dict(ret.json())['queries'][0]['results'][0]['values'][0][1]))
        assert dict(ret.json())['queries'][0]['results'][0]['values'][0][1] == 4, "Wrong CRITICAL result"
    except AssertionError as e:
        print("Error: {}".format(str(e)))
    except Exception as e:
        print("Problem getting results from KairosDB: {}".format(str(e)))
        return False
    return True
 if __name__ == '__main__':
    main()
--- a/AoM_Service/library/init.py
+++ b/AoM_Service/library/init.py
--- a/AoM_Service/library/alert_config.py
+++ b/AoM_Service/library/alert_config.py
@@ -0,0 +1,66 @@
 class Alert_Config():
   def __init__(self, yaml_config) :
      if not 'alert_tags' in yaml_config :
         yaml_config['alert_tags'] = {}
      self.id = str(yaml_config['id'])
      self.yaml_config = yaml_config
      self.tags = {}
      self.state = {}
   def type(self) :
      if 'type' in self.yaml_config :
         return self.yaml_config['type']
      return 'kairos'
   def tags(self) :
      if 'tags' in self.yaml_config :
         return self.yaml_config['tags']
      return []
   def occurrences(self) :
      if 'occurrences_threshold' in self.yaml_config :
         return self.yaml_config['occurrences_threshold']
      return 1
   def url(self) :
      if 'url' in self.yaml_config :
         return self.yaml_config['url']
      from os import environ
      return environ['AOM_GRAFANA_URL'] + self.id
   def get_level(self, key) :
      if not key in self.state :
         self.state[key] = None
      return self.state[key]
   def set_level(self, key, value) :
      self.state[key] = value
   def get_for_tags(self, key) :
      if not key in self.tags :
         self.tags[key] = 0
      return self.tags[key]
   def set_for_tags(self, key, value) :
      if not key in self.tags :
         self.tags[key] = 0
      self.tags[key] = value
   def init_for_tags(self, key) :
      for k in [key, key+"_count"] :
         if not key in self.tags :
            self.set_for_tags(key, 0)
      self.set_for_tags(key+"_noresult", 0)
   def get_threshold(isUpper, isWarning) :
      if isUpper and isWarning :
         return self.try_get_yaml_config('warning_upper_threshold')
      if isUpper and not isWarning :
         return self.try_get_yaml_config('critical_upper_threshold')
      elif not isUpper and isWarning :
         return self.try_get_yaml_config('warning_lower_threshold')
      elif not isUpper and not isWarning :
         return self.try_get_yaml_config('critical_lower_threshold')
   def try_get_yaml_config(self, key) :
      return self.yaml_config[key] if key in self.yaml_config else None, key in self.yaml_config
--- a/AoM_Service/library/alert_config_list.py
+++ b/AoM_Service/library/alert_config_list.py
@@ -0,0 +1,36 @@
 from alert_config import Alert_Config
 class Alert_Config_List() :
   def __init__(self, alert_configs=None) :
      self.hash = {}
      if alert_configs :
         self.add(alert_configs)
   def __getitem__(self, k) :
      return self.hash[k]
   def __len__(self) :
      return len(self.hash)
   def add(self, alert_config) :
      if isinstance(alert_config, Alert_Config):
         self.hash[alert_config.id] = alert_config
      elif isinstance(alert_config, list) :
         for a in alert_config :
            self.add(a)
      elif isinstance(alert_config, Alert_Config_List) :
         for k in alert_config.hash :
            self.add(alert_config.hash[k])
      else :
         raise Exception("unexpected type added to Alert_Config_List")
   def compare(self, other) :
      if not other :
         other = Alert_Config_List()
      self_keys = self.hash.keys()
      other_keys = other.hash.keys()
      added = other_keys - self_keys
      removed = self_keys - other_keys
      intersection = [i for i in self_keys if i in other_keys]
      modified = [ i for i in intersection if self[i] != other[i] ]
      return set(added), set(removed), set(modified)
--- a/AoM_Service/library/args.py
+++ b/AoM_Service/library/args.py
@@ -0,0 +1,163 @@
 # Contians the arg parser options.
 """Contains the arg parser options."""
 import argparse
 import sys
 def get_builder_args():
    """
    Gets the arguments passed in to the aom_builder main call
    :return: parser object
    """
    parser = argparse.ArgumentParser(
        description="Generates a valid yaml file "
        "for alerting on metrics. If you are "
        "familiar with the yaml structure for an "
        "alert you don't have to use this builder,"
        " it's just convenient")
    parser.add_argument('-q', '--query', help="The Kariosdb query string to "
                        "use")
    parser.add_argument(
        '-i', '--interval', type=int, default=60, help="The "
        "interval that the check will This value is in seconds")
    parser.add_argument('-t', '--threshold', '--upperthreshold', help="The "
                        "upper threshold is the value that when reached will "
                        "cause an depending on the threshold logic. "
                        "Use in conjunction with lower threshold to define a "
                        "normal band.")
    parser.add_argument(
        '-b',
        '--lowerthreshold',
        help="The lower threshold is the value that when reached will cause an "
        "alert depending on the threshold logic"
        "Use in conjunction with upper threshold to define a normal band.")
    parser.add_argument(
        '-m',
        '--measure',
        choices=[
            'gt',
            'lt',
            'eq'],
        help="The measure to use to compare the "
        "threshold to the values of the alerts")
    parser.add_argument(
        '-a',
        '--alert_config',
        help='A valid Yaml representation of your alerting block')
    parser.add_argument(
        '-l',
        '--log_level',
        type=int,
        default=0,
        help="The log level for the aom_builder run. "
        "[0=Error, 1=Info, 2=Debug]")
    parser.add_argument(
        '-p',
        '--port',
        type=int,
        default=8080,
        help="The port to run the webapp on")
    return args_to_dict(parser)
 def get_tester_service_args():
    """
    Gets arguments passed into aom_tester.py
    Returns: parser object
    """
    parser = argparse.ArgumentParser(
        description="Parameters to start the alerting on metrics dummy tester "
        "service")
    parser.add_argument(
        '-l',
        '--log_level',
        type=int,
        default=0,
        help="The log level for the aom_service app"
        "[0=Error, 1=Info, 2=Debug]")
    parser.add_argument(
        '-a',
        '--alert_configs',
        default=None,
        help="If provided will override the folder location read from the "
        "config with the value passed in. Is helpful for testing and "
        "troubleshooting alerts")
    parser.add_argument(
        '--hostname',
        help="If provided, will override the actual hostname check with this "
        "value")
    parser.add_argument(
        '-p',
        '--port',
        type=int,
        default=8080,
        help="The port to run the webapp on")
    return args_to_dict(parser)
 def get_service_args():
    """
    Gets arguments passed into aom_service.py
    Returns: parser object
    """
    parser = argparse.ArgumentParser(
        description="Parameters to start the alerting on metrics service")
    parser.add_argument(
        '-l',
        '--log_level',
        type=int,
        default=0,
        help="The log level for the aom_service app"
        "[0=Error, 1=Info, 2=Debug]")
    parser.add_argument(
        '-a',
        '--alert_configs',
        default=None,
        help="If provided will override the folder location read from the "
        "config with the value passed in. Is helpful for testing and "
        "troubleshooting alerts")
    parser.add_argument(
        '--alert_routing_lookup',
        default=None,
        help="If provided will override the folder used to fetch the alerts "
        "lookup configuration.")
    parser.add_argument(
        '-o',
        '--override',
        action='store_true',
        help="Overrides the check leader election value")
    parser.add_argument(
        '--hostname',
        help="If provided, will override the actual hostname check with this "
        "value")
    parser.add_argument(
        '-p',
        '--port',
        type=int,
        default=8080,
        help="The port to run the webapp on")
    return args_to_dict(parser)
 def args_to_dict(parsed_args):
    """
    Converts the argument parser object to a dict
    Args:
        parsed_args: Arg parser object
    Returns:
        Dictionary of arguments
    """
    try:
        arg_list = parsed_args.parse_args()
        # RETURN A DICT OF ARGUMENTS
        arg_dict = dict()
        for val in vars(arg_list):
            arg_dict[val] = getattr(arg_list, val)
        return arg_dict
    except argparse.ArgumentError:
        parsed_args.print_help()
        sys.exit(1)
--- a/AoM_Service/library/config.py
+++ b/AoM_Service/library/config.py
@@ -0,0 +1,226 @@
 # config.py
 """Functions for loading alert configuration files"""
 import glob
 import os
 import json
 import hashlib
 import yaml
 import requests
 from serviceapp import service
 # import logging
 # logger = logging.getLogger(__name__)
 def md5(fname):
    """Calculates md5 hash of a filename"""
    hash_md5 = hashlib.md5()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()
 def get_healthy_nodes_and_index(consul_url, hostname, logger):
    """Find AOM healthy nodes on consult"""
    try:
        # getting all registered nodes from consul
        r = requests.get(
            consul_url +
            '/v1/catalog/service/alert-on-metrics',
            timeout=60)
        assert r.status_code == 200, "Failed to get back a 200 from consul catalog"
        value = json.loads(r.text)
        node_list = []
        host_index = -1
        for elem in value:
            node_list.append(elem.get('Node'))
        # Retrieving healthy nodes
        healthy_nodes = []
        for node in node_list:
            r2 = requests.get(
                consul_url +
                '/v1/health/node/' +
                node,
                timeout=60)
            assert r.status_code == 200, "Failed to get back a 200 from consul health"
            healthcheck_list = json.loads(r2.text)
            for check in healthcheck_list:
                if (check.get('CheckID') == 'check_healthcheck_alert-on-metrics_alert-on-metrics' and
                        check.get('Status') == 'passing'):
                    healthy_nodes.append(node)
        try:
            healthy_nodes.sort()
            host_index = healthy_nodes.index(hostname)
        except ValueError:
            logger.error("Host is not healthy")
    except TimeoutError:
        logger.error("Timed out connecting to Consul")
    return host_index, len(healthy_nodes)
 def distribute_configs(
        filename,
        host_index,
        module,
        logger):
    """Uses md5 of alert config to split the files among healthy servers"""
    if module == 0:
        logger.error("No healthy nodes for the service")
        return False
    if host_index == -1:
        logger.error("Host is unhealthy")
        return False
    if int(md5(filename), 16) % module == host_index:
        return True
    return False
 def is_valid(alert_config, logger):
    """Checks if alert has all required fields"""
    try:
        assert alert_config['alerts'], "No Alerts configured, this is a dead config"
        assert alert_config['query'], "No Query, this is a dead config"
        assert alert_config['interval'] >= 30, "Intervals less than 30 are invalid"
        assert alert_config['id'], "Alert ID is empty, this is a dead config"
        if alert_config.get('query_type') == 'prometheus':
            assert isinstance(
                alert_config['query'], str), "Invalid Prometheus query"
        else:
            assert isinstance(
                alert_config['query'], dict), "Kairosdb Query string cannot be validated as proper JSON"
            defined_tags = set(alert_config['query']['metrics'][0]['tags'].keys()).union(
                {'', 'dc', 'fqdn'})
            # IF THERE IS AGGREGATION WE HAVE TO ADD THESE TAGS
            if 'group_by' in alert_config['query']['metrics'][0]:
                defined_tags.update(
                    set(alert_config['query']['metrics'][0]['group_by'][0]['tags']))
            # for undefined_tag in set(alert_config['tags']).difference(defined_tags):
            #     print("WARNING! {} tag is not defined on the query. Please make sure it does exist to "\
            #           "prevent empty results".format(undefined_tag))
            # OUR MINIMUM THRESHOLD NEED
        assert 'critical_lower_threshold' in alert_config or 'critical_upper_threshold' in alert_config or \
            'warning_lower_threshold' in alert_config or 'warning_upper_threshold' in alert_config, \
            "Config must have at least one threshold set."
        # JUST MAKE SURE YOU ARE NOT DOING SOMETHING STUPID WITH WARNING COMING
        # AFTER CRITICAL
        if 'warning_lower_threshold' in alert_config and 'critical_lower_threshold' in alert_config:
            assert alert_config['critical_lower_threshold'] < alert_config['warning_lower_threshold'], \
                "Lower Critical must be less than Lower Warning"
        if 'warning_upper_threshold' in alert_config and 'critical_upper_threshold' in alert_config:
            assert alert_config['critical_upper_threshold'] > alert_config['warning_upper_threshold'], \
                "Upper Critical must be greater than Upper Warning"
        if 'lookup' in alert_config['alerts']:
            assert 'default' in alert_config['alerts']['lookup'], 'No default alert configured for the lookup configuration'
            assert 'lookup_file' in alert_config['alerts']['lookup'] or 'lookups' in alert_config['alerts'][
                'lookup'], 'No lookup configured either in the alert configuration or in a separated file'
            assert 'tags' in alert_config['alerts']['lookup'], 'No tags configured for the lookup configuration'
            assert all(
                isinstance(
                    tag, str) for tag in alert_config['alerts']['lookup']['tags']), 'Tags must be valid string'
        # if 'occurrences_threshold' in alert_config:
        #     assert alert_config['occurrences_threshold'] >= 1, \
        #         "Having an occurrences value less than 2 is assumed and pointless to specify"
    except Exception as e:
        logger.warning("Invalid config file: {}".format(str(e)))
        return False
    return True
 def is_valid_alert_routing_lookup(alert_routing_lookup, alert, logger):
    """Check if routing lookup is properly configured"""
    try:
        assert alert_routing_lookup, "No lookup values configured, the configuration is empty."
        for alert_routing in alert_routing_lookup:
            assert 'alert' in alert_routing, "No alert defined for this configuration."
            assert 'tags' in alert_routing, "No tags value defined for this configuration."
            for tag in alert_routing['tags']:
                assert tag in alert['alerts']['lookup']['tags'], "The tag {} is not part of the configuration".format(
                    tag)
            assert all(isinstance(tag, str)
                       for tag in alert_routing['tags']), "Tags must be valid string"
    except AssertionError as e:
        logger.warning("Invalid alert routing config file: {}".format(str(e)))
        return False
    return True
 # noinspection PyBroadException
 def glob_the_configs(
        config_path,
        lookup_config_path,
        consul_url,
        hostname,
        logger):
    """
    Args:
        config_path (string): relative path to the configs
        consul_url (string): url to consul service
        logger:
    Returns:
        List of configs
    """
    invalid_configs = 0
    alert_list = []
    host_index, module = get_healthy_nodes_and_index(
        consul_url, hostname, logger)
    for config_file in glob.glob(config_path + "/**/*.yaml", recursive=True):
        logger.debug("Found {} config".format(config_file))
        # LOAD CONFIG
        if distribute_configs(
                config_file,
                host_index,
                module,
                logger):
            try:
                alert = yaml.safe_load(open(config_file, 'rb').read())
                if is_valid(alert, logger):
                    if 'lookup' in alert['alerts']:
                        alert_routing_lookup = []
                        is_valid_lookup = True
                        if 'lookup_file' in alert['alerts']['lookup']:
                            lookup_path = "{}/{}".format(
                                lookup_config_path, alert['alerts']['lookup']['lookup_file'])
                            if os.path.isfile(lookup_path):
                                alert_routing_lookup = yaml.safe_load(
                                    open(lookup_path, 'rb').read())
                            else:
                                is_valid_lookup = False
                        else:
                            alert_routing_lookup = alert['alerts']['lookup']['lookups']
                        is_valid_lookup = is_valid_lookup and is_valid_alert_routing_lookup(
                            alert_routing_lookup, alert, logger)
                        if is_valid_lookup:
                            alerts_per_tags = {}
                            for alert_configuration in alert_routing_lookup:
                                key = []
                                for tag in alert['alerts']['lookup']['tags']:
                                    key.append(
                                        alert_configuration['tags'].get(tag))
                                alerts_per_tags[tuple(
                                    key)] = alert_configuration['alert']
                            alert['alert_routing_lookup'] = alerts_per_tags
                        else:
                            invalid_configs += 1
                            continue
                    alert_list.append(alert)
                else:
                    invalid_configs += 1
            except BaseException as e:
                logger.error("Error parsing {} config: {}".format(config_file, e))
    logger.info("Invalid configs: {}".format(invalid_configs))
    service.send_stat(
        'invalid_configs',
        invalid_configs,
        dict(),
        statprefix='aom')
    logger.info("Loaded {} configs".format(len(alert_list)))
    return alert_list
--- a/AoM_Service/library/job.py
+++ b/AoM_Service/library/job.py
@@ -0,0 +1,10 @@
 import subprocess
 class Job() :
   def __init__(self, id, p):
      self.id = id
      self.p = p
   def kill(self) :
      subprocess.call(["/bin/kill", "-9", "{}".format(self.p.pid)])
      self.p.join()
--- a/AoM_Service/library/job_list.py
+++ b/AoM_Service/library/job_list.py
@@ -0,0 +1,29 @@
 from job import Job
 class Job_List() :
   def __init__(self) :
      self.jobs = {}
   def __getitem__(self, k) :
      return self.jobs[k]
   def __setitem__(self, k, v) :
      self.jobs[k] = v
   def __len__(self) :
      return len(self.jobs)
   def add(self, job) :
      if isinstance(job, Job) :
         self[job.id] = job
      elif isinstance(job, Job_List) :
         for j in job.jobs :
            self.add(job[j])
      else :
         raise Exception("unexpected type added to Job_List")
   def kill(self, id) :
      if not id in self.jobs :
         return
      self[id].kill()
      del(self.jobs[id])
--- a/AoM_Service/library/logger.py
+++ b/AoM_Service/library/logger.py
@@ -0,0 +1,122 @@
 # logger.py
 """ Logging configuration """
 import logging
 import logging.handlers
 import os
 logging.getLogger('requests').setLevel(logging.ERROR)
 logging.getLogger('urllib3').setLevel(logging.ERROR)
 logging.getLogger('werkzeug').setLevel(logging.ERROR)
 class SingleLevelFilter(logging.Filter):
    def __init__(self, passlevel, reject):
        """
        initilizer(constructor) of the singlelevelfilter
        @param passlevel (int) - the int value of the level of the log
        @param reject (bool) - if true will return if the record level is
            not equal to the passlevel
        @return SingleLevelFilter object
        @note Sets some object parameters
        """
        self.passlevel = passlevel
        self.reject = reject
    def filter(self, record):
        """
        Returns True/False depending on parameters
        @param record (Log int) - the record that the filter belongs to
        @return bool - True/False depending on what self.reject is set to and
                       what record.levelno and self.passlevel are set to
        @note This causes either only logging of the exact same level to get
              logged, or only logging other than the same level to get logged
        """
        if self.reject:
            return record.levelno != self.passlevel
        return record.levelno == self.passlevel
 class AlertLogging(logging.Logger):
    """
    Class Object to handle the logging of the alert on metrics service
    starts at Error level and can flip on (and add) an additional log file and
    Debug logger as needed.
    """
    def __init__(self, name):
        """
        Inits the formaters and logger
        """
        self.name = name
        self.debug_formatter = logging.Formatter(
            "%(asctime)s - [%(levelname)s] - [%(module)s:%(lineno)d] - "
            "%(message)s", "%m-%d %H:%M:%S")
        self.standard_formatter = logging.Formatter(
            "%(asctime)s - [%(levelname)s] -  %(message)s", "%m-%d %H:%M:%S")
        logging.getLogger()
        logging.Logger.__init__(self, name, logging.DEBUG)
        logging.setLoggerClass(AlertLogging)
    def start(self):
        """
        Returns:
        """
        info_handler = logging.StreamHandler()
        info_handler.setLevel(logging.INFO)
        info_handler.setFormatter(self.standard_formatter)
        self.addHandler(info_handler)
        return self
    def start_log_file(self, file_path, mode='a'):
        """
        Creates a separate log file handler
        Args:
            file_path: path to the log file
            mode: the type of mode to open the file handler with
        Returns:
        """
        self.log_path = file_path
        work_folder = os.path.dirname(file_path)
        if work_folder and not os.path.exists(work_folder):
            os.makedirs(work_folder)
        self.log_handler = logging.FileHandler(file_path, mode)
        self.log_handler.setLevel(logging.WARNING)
        self.log_handler.setFormatter(self.debug_formatter)
        self.addHandler(self.log_handler)
    def stop_log_file(self):
        """
        Closes Log file and sets the handler to None
        Returns:
        """
        self.log_handler.close()
        self.removeHandler(self.log_handler)
        self.log_handler = None
    def start_debug(self):
        """
        Returns:
        """
        self.debug_handler = logging.StreamHandler()
        self.debug_handler.setLevel(logging.DEBUG)
        self.debug_handler.addFilter(SingleLevelFilter(logging.DEBUG, False))
        self.debug_handler.setFormatter(self.debug_formatter)
        self.addHandler(self.debug_handler)
    def stop_debug(self):
        """
        stop the debugger
        Returns:
        """
        self.removeHandler(self.debug_handler)
        self.debug_handler = None
--- a/AoM_Service/library/process.py
+++ b/AoM_Service/library/process.py
@@ -0,0 +1,14 @@
 import multiprocessing
 class Process(multiprocessing.Process) :
   def __init__(self, alert_config, config, logger, production_mode) :
      multiprocessing.Process.__init__(
         self,
         target=self.get_target(),
         args=(alert_config, config, logger, production_mode),
         name=alert_config.id,
         daemon=True,
      )
   def get_target(self) :
      raise Exception("abstract method not implemented")
--- a/AoM_Service/library/process_factory.py
+++ b/AoM_Service/library/process_factory.py
@@ -0,0 +1,14 @@
 import process_prometheus
 import process_kairos
 class Process_Factory() :
   def __init__(self, config, logger, production) :
      self.config = config
      self.logger = logger
      self.production = production
   def build(self, alert_config) :
      if alert_config.type() == "prometheus" :
         return process_prometheus.Process_Prometheus(alert_config, self.config, self.logger, self.production)
      else:
         return process_kairos.Process_Kairos(alert_config, self.config, self.logger, self.production)
--- a/AoM_Service/library/process_kairos.py
+++ b/AoM_Service/library/process_kairos.py
@@ -0,0 +1,6 @@
 import process
 from serviceapp import service
 class Process_Kairos(process.Process) :
   def get_target(self) :
      return service.check_kairosdb_alert
--- a/AoM_Service/library/process_prometheus.py
+++ b/AoM_Service/library/process_prometheus.py
@@ -0,0 +1,6 @@
 import process
 from serviceapp import service
 class Process_Prometheus(process.Process) :
   def get_target(self) :
      return service.check_prometheus_alert
--- a/AoM_Service/library/service.py
+++ b/AoM_Service/library/service.py
@@ -0,0 +1,80 @@
 import os
 from alert_config_list import Alert_Config_List
 from alert_config import Alert_Config
 from job_list import Job_List
 from job import Job
 from process_factory import Process_Factory
 from time import sleep
 from config import glob_the_configs
 from serviceapp import service
 class Service() :
   def __init__(self, logger, reload_interval, hostname, config):
      self.alert_config_list = Alert_Config_List()
      self.job_list = Job_List()
      self.logger = logger
      self.info = self.logger.info
      self.error = self.logger.error
      self.reload_interval = reload_interval
      self.box_hostname = os.environ['HOSTNAME'] if hostname is None else hostname
      self.production = not "TEST" in os.environ
      self.config = config
   def start(self) :
      self.info("Waiting 15s for Consul service to pass")
      sleep(15)
      while self.is_running() :
         new_alert_config_list = self.get_new_alert_config_list()
         self.purge_stale(new_alert_config_list)
         self.create_upserted(new_alert_config_list)
         self.alert_config_list = new_alert_config_list
         total_jobs = len(self.job_list)
         self.info("Total running jobs: {}".format(total_jobs))
         service.send_stat('total_jobs', total_jobs, dict(), statprefix='aom')
         sleep(self.reload_interval)
      self.info("Exiting alerts")
      self.purge_stale(Alert_Config_List())
   def is_running(self) :
      return True
   def get_new_alert_config_list(self) :
      try :
         yaml_configs = self.parse_alert_config_files()
         alert_configs = [Alert_Config(i) for i in yaml_configs]
         return Alert_Config_List(alert_configs)
      except Exception as e :
         self.error("Failed to load config files: {}".format(e))
         return []
   def parse_alert_config_files(self) :
      path = self.config['alert_folder']
      routing = self.config['alert_routing_config']
      consul = 'http://consul.service.consul:8500'
      return glob_the_configs(path, routing, consul, self.box_hostname, self.logger)
   def purge_stale(self, new_alert_config_list) :
      _, removed_ids, modified_ids = self.alert_config_list.compare(new_alert_config_list)
      stale_ids = removed_ids.union(modified_ids)
      for stale_id in stale_ids :
         self.job_list.kill(stale_id)
      service.send_stat('removed_jobs', len(removed_ids), dict(), statprefix='aom')
      self.info("Removed alert_configs: {}".format(removed_ids))
   def create_upserted(self, new_alert_config_list) :
      added_ids, _, modified_ids = self.alert_config_list.compare(new_alert_config_list)
      upserted_ids = added_ids.union(modified_ids)
      for id in upserted_ids :
         p = self.spawn_process(new_alert_config_list[id])
         j = Job(id, p)
         self.job_list.add(j)
      service.send_stat('new_jobs', len(added_ids), dict(), statprefix='aom')
      service.send_stat('modified_jobs', len(modified_ids), dict(), statprefix='aom')
      self.info("Added alert_configs: {}".format(added_ids))
      self.info("Modified alert_configs: {}".format(added_ids))
   def spawn_process(self, alert_config) :
      process_factory = Process_Factory(self.config, self.logger, self.production)
      process = process_factory.build(alert_config)
      process.start()
      return process
--- a/AoM_Service/library/serviceapp/init.py
+++ b/AoM_Service/library/serviceapp/init.py
--- a/AoM_Service/library/serviceapp/alert.py
+++ b/AoM_Service/library/serviceapp/alert.py
@@ -0,0 +1,189 @@
 from thresholds import Thresholds
 class Alert() :
   def __init__(self, alert_config, logger, tags, result, min_value, max_value) :
      self.occurrences_breached = False
      self.new_level_breached = False
      self.info = logger.info
      self.debug = logger.debug
      self.warning = logger.warning
      self.error = logger.error
      self.alert_config = alert_config
      self.thresholds = Thresholds(alert_config)
      self.tags = ""
      self.result = result
      self.set_tags(tags)
      self.alert_config.init_for_tags(alert_config.get_tags())
      self.set_firing(min_value, max_value)
      if availability :
         self.info("Sending availability stat 1")
         self.send_metrics(self.name(), 0 if self.level() == "CRITICAL" else 1, self.result, 'service_level')
   def name(self) :
      return "Metric: {} for {}".format(self.alert_config.id, self.get_tags())
   def body(self) :
      body = ""
      if not self.get_firing() :
         body = self.get_not_firing_body()
      else :
         body = self.get_is_firing_body()
      self.debug("Alert {}->[{}]->{}, Occurrences={} of {}".format(
         self.name(),
         self.get_tags(),
         self.level(),
         self.get_occurrences(),
         self.alert_config.occurrences(),
      ))
      self.send_metrics(self.name(), self.level_code(), self.level())
      #  TODO
      return body, md5(tag.encode('utf-8')).hexdigest()[:10]
   def level(self) :
      if not self.get_firing() :
         return "RECOVERY"
      if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.CRITICAL)] :
         return "CRITICAL"
      if [t for t in self.thresholds.get_thresholds_matching(level=Thresholds.WARNING)] :
         return "WARNING"
   def level_code(self) :
      level = self.level()
      if level == "RECOVERY" :
         return 0
      elif level == "WARNING" :
         return 0
      elif level == "CRITICAL" :
         return 0
   def get_not_firing_body(self) :
      body = ""
      body += get_not_firing_body_threshold()
      body += get_not_firing_body_occurrences()
      if not body :
         self.alert_config.set_for_tags(self.get_tags()+"_count", force)
         return ""
      return "GOOD: " + body
   def get_not_firing_body_threshold(self) :
      if self.result is None :
         return ""
      body = ""
      v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=True)
      if not ok :
         v, ok = self.alert_config.get_threshold(isUpper=True, isWarning=False)
      if ok :
         body += self.form("<", v)
      v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=True)
      if not ok :
         v, ok = self.alert_config.get_threshold(isUpper=False, isWarning=False)
      if ok :
         body += self.form(">", v)
      return body
   def get_not_firing_body_occurrences(self) :
      if not self.get_occurrences() :
         return ""
      body = ""
      if not self.result is None :
         self.send_metrics(self.name(), 1, self.level())
      else :
         body += "{} RECOVERY due to no results found from query. Recommend you manually validate recovery\n{}".format(self.name(), self.alert_config.url())
      self.set_occurrences(force=0)
      return body
   def get_is_firing_body(self) :
      body = ""
      if self.thresholds.get_breached(level=Thresholds.UPPER) :
         body += self.form(">", self.upper_firing)
      if self.thresholds.get_breached(level=Thresholds.LOWER) :
         body += self.form("<", self.upper_firing)
      if self.occurrences_breached :
         self.debug("Value {} of {} for tag {} has occurred {} time(s) < threshold of {}".format(
            self.value,
            self.name(),
            self.get_tags(),
            self.get_occurrences(),
            self.alert_config.occurrences(),
         ))
         return ""
      return body
   def form(self, operator, static) :
      return "{}\n{:.2f} {}= {}\n{}".format(
         self.name(),
         self.value,
         operator,
         static,
         self.alert_config.url(),
      )
   def set_tags(self, tags) :
      if tags :
         self.tags = tags
      elif self.result :
         import itertools
         result_tags = [ self.result['tags'][x] for x in self.alert_config.get_tags() ]
         chain = itertools.chain(result_tags)
         sorted_list = sorted(list(chain))
         self.tags = ", ".join(sorted_list)
      if not self.tags :
         self.tags = "instance"
   def get_tags(self) :
      return self.tags
   def set_firing(self, min_value, max_value) :
      self.thresholds = Thresholds(self.alert_config)
      self.thresholds.set_breached(min_value, max_value)
      self.set_occurrences()
      self.set_new_level_breached()
      self.send_metrics()
      self.send_threshold_metrics()
   def get_firing(self) :
      return self.thresholds.get_breached() and self.occurrences_breached
   def get_occurrences(self) :
      tags = self.get_tags()
      return self.alert_config.get_for_tags(tags)
   def set_occurrences(self, force=None) :
      previous_occurrences = self.get_occurrences()
      if self.thresholds.get_breached() :
         new_occurrences = previous_occurrences+1
         self.alert_config.set_for_tags(self.get_tags(), new_occurrences)
         self.occurrences_breached = self.alert_config.occurrences() <= new_occurrences
      if force :
         self.alert_config.set_for_tags(self.get_tags(), force)
         self.alert_config.set_for_tags(self.get_tags()+"_count", force)
   def send_metrics(self, *args, **kwargs) :
      print("send_metrics not impl")
   def set_new_level_breached(self) :
      key = self.get_tags()
      level = self.level()
      previous_level = self.alert_config.get_level(key)
      self.new_level_breached = level != previous_level
      self.alert_config.set_level(key, level)
      self.info("testInfo: {} {}".format(
         "NEW" if self.new_level_breached else "EXISTING",
         self.level(),
      ))
   def get_new_level_breached(self) :
      return self.new_level_breached
   def send_threshold_metrics(self) :
      #  TODO
      self.send_metrics(self.alert_config.id, self.value)
      for level in [Thresholds.WARNING, Thresholds.CRITICAL] :
         for end in [Thresholds.UPPER, Thresholds.LOWER] :
            v, ok = self.alert_config.get_threshold(isUpper=level == Thresholds.UPPER, isWarning=end == Thresholds.WARNING)
            if ok :
               key = "{}_{}_threshold".format(
                  "upper" if level == Thresholds.UPPER else "lower",
                  "warning" if level == Thresholds.WARNING else "critical",
               )
               self.send_stat(key, v, {'id':self.name()})
--- a/AoM_Service/library/serviceapp/alert_factory.py
+++ b/AoM_Service/library/serviceapp/alert_factory.py
@@ -0,0 +1,13 @@
 from alert import Alert
 class Alert_Factory() :
   def __init__(self, alert_config, logger) :
      self.alert_config = alert_config
      self.logger = logger
      self.info = logger.info
      self.warning = logger.warning
      self.debug = logger.debug
      self.error = logger.error
   def build(self, minvalue, maxvalue, result, tags, availability, alert_tags) :
      return Alert(self.alert_config, tags, result, minvalue, maxvalue)
--- a/AoM_Service/library/serviceapp/prom_api.py
+++ b/AoM_Service/library/serviceapp/prom_api.py
@@ -0,0 +1,83 @@
 from datetime import datetime, timedelta
 from urllib.parse import urljoin
 import requests
 class PromAPI:
    def __init__(self, endpoint='http://127.0.0.1:9090/'):
        """
        :param endpoint: address of
        """
        self.endpoint = endpoint
    @staticmethod
    def _to_timestamp(input_):
        """
        Convert string input to UNIX timestamp for Prometheus
        :param input_:
        :return:
        """
        if type(input_) == datetime:
            return input_.timestamp()
        if input_ == 'now':
            return datetime.utcnow().isoformat('T')
        if type(input_) is str:
            input_ = float(input_)
        if type(input_) in [int, float]:
            if input_ > 0:
                return input_
            if input_ == 0:  # return now
                return datetime.utcnow().isoformat('T')
            if input_ < 0:
                return (datetime.utcnow() + timedelta(seconds=input_)).isoformat('T')
        #assert type(input_) == float
    def query(self, query='prometheus_build_info'):
        return self._get(
            uri='/api/v1/query',
            params=dict(
                query=query
            )
        )
    def query_range(self, query='prometheus_build_info', start=-60, end='now', duration=60):
        """Get ser"""
        params = {
            'query': query
        }
        if end is not None:
            params['end'] = self._to_timestamp(end) + 'Z'
        if start:
            params['start'] = self._to_timestamp(start) + 'Z'
        if duration:
            params['step'] = duration
        print(params)
        return self._get(
            uri='/api/v1/query_range',
            params=params
        )
    def series(self, match='prometheus_build_info', start=-86400, end='now'):
        """Get ser"""
        params = {
            'match[]': match
        }
        if end is not None:
            params['end'] = self._to_timestamp(end) + 'Z'
        if start:
            params['start'] = self._to_timestamp(start) + 'Z'
        print(params)
        return self._get(
            uri='/api/v1/series',
            params=params
        )
    def _get(self, uri, params, method='GET'):
        url = urljoin(self.endpoint, uri)
        assert method == 'GET'
        result = requests.get(
            url=url,
            params=params
        )
        return result.json()
--- a/AoM_Service/library/serviceapp/service.py
+++ b/AoM_Service/library/serviceapp/service.py
@@ -0,0 +1,949 @@
 """ Alert On Metrics functions"""
 import copy
 import itertools
 import json
 import os
 import random
 import smtplib
 from email.mime.text import MIMEText
 from socket import gaierror
 from time import sleep
 from hashlib import md5
 import requests
 from statsd import StatsClient
 from serviceapp.prom_api import PromAPI
 alert_status = [
    'RECOVERY',
    'WARNING',
    'WARNING',
    'CRITICAL',
    'CRITICAL',
    'CRITICAL']
 def build_alert_message(alert, minvalue, maxvalue, result, logger,
                        availability, tag=None, alert_tags=None):
    """
    Build the alert message
    Args:
        alert: the alert object that includes a tag definition
        minvalue: the min value to test against the threshold
        maxvalue: the max value to test against the threshold
        result: the response back from kairosdb
        logger (log object): does the logging
        availability: Send availability stat 1
        tag: If passed in will use this value for the tag instead of
        getting it from the result object
        alert_tags: the tags corresponding to the result, used if an
        alert has to be triggered and a custom routing per tag is configured
    Returns:
        Alert message string
    """
    # DEFAULT TO MAX VALUE AS THE VALUE WE WILL ALERT ON. LOGIC BELOW
    # MAY CHANGE THIS.
    #    value = maxvalue
    #    # HANDLE THE CASE WHERE SOMEONE HAS NOT SPECIFIED ANY TAGS IN THEIR QUERY
    #    # (USUALLY A GLOBAL ALL-DC QUERY)
    #    if tag is None and result is not None:
    #        tag = ', '.join(sorted(list(itertools.chain(
    #            *[result['tags'][x] for x in alert['tags']]))))
    #    tag_count = tag + "_count"
    #    WE WILL USE THIS ONE LATER FOR TRACKING OCCURRENCES OF KAIROSDB NOT
    #    RETURNING RESULTS
    #    tag_noresult = tag + "_noresult"
    #    if not tag:
    #        tag = 'instance'
    #        logger.debug("No tag specified for alert {}".format(alert['id']))
    # INSTEAD OF TRYING TO HANDLE LOGIC WHERE THESE ARE NOT IN THE OBJECT, PUT
    # THEM IN AS SOON AS THEY ARE CREATED SO THAT ON FIRST RUN AN ALERT HAS ALL
    # THE ALERT['alert_tags'][TAG] AND ALERT['alert_tags'][TAG_COUNT] NEEDED
    #    if 'alert_tags' not in alert:
    #        alert['alert_tags'] = {}
    #    if tag not in alert['alert_tags']:
    #        alert['alert_tags'][tag] = 0
    #    if tag_count not in alert['alert_tags']:
    #        alert['alert_tags'][tag_count] = 0
    # IF WE HIT THIS FUNCTION THEN WE ALWAYS SET (OR RESET) THIS NORESULT
    # COUNTER TO 0 IE. IF WE ARE HERE IT IMPLIES WE HAVE A RESULT FROM
    # KAIROSDB OR WE ARE AT THE END OF A LONG PERIOD OF NORESULTS WHERE WE ARE
    # CLEARING EVERYTHING OUT ANYWAY
    #    alert['alert_tags'][tag_noresult] = 0
    #    # FIRST FIND OUT WHAT THRESHOLDS ARE SET AND HAVE BEEN BREACHED
    #    upper_critical_threshold = None
    #    upper_warning_threshold = None
    #    lower_warning_threshold = None
    #    lower_critical_threshold = None
    #    upper_threshold = None
    #    lower_threshold = None
    #    is_warning_alarm = False
    #    is_critical_alarm = False
    #    # UPPER
    #    upper_threshold_exists = False
    #    upper_warning_threshold_breached = False
    #    upper_critical_threshold_breached = False
    #    if 'warning_upper_threshold' in alert:
    #        upper_threshold_exists = True
    #        upper_warning_threshold = alert['warning_upper_threshold']
    #        upper_threshold = upper_warning_threshold
    #        if maxvalue >= upper_warning_threshold:
    #            upper_warning_threshold_breached = True
    #            is_warning_alarm = True
    #    if 'critical_upper_threshold' in alert:
    #        upper_critical_threshold = alert['critical_upper_threshold']
    #        if not upper_threshold_exists:
    #            upper_threshold = upper_critical_threshold
    #        upper_threshold_exists = True
    #        # IF CONFIG HAS A CRITICAL THRESHOLD SET AND WE PASS THAT THEN THAT IS
    #        # OUR THRESHOLD FOR ALERTING
    #        if maxvalue >= alert['critical_upper_threshold']:
    #            upper_threshold = upper_critical_threshold
    #            upper_critical_threshold_breached = True
    #            is_critical_alarm = True
    #    upper_threshold_breached = (upper_warning_threshold_breached
    #                                or upper_critical_threshold_breached)
    #    # LOWER
    #    lower_threshold_exists = False
    #    lower_warning_threshold_breached = False
    #    lower_critical_threshold_breached = False
    #    if 'warning_lower_threshold' in alert:
    #        lower_threshold_exists = True
    #        lower_warning_threshold = alert['warning_lower_threshold']
    #        lower_threshold = lower_warning_threshold
    #        if minvalue <= lower_warning_threshold:
    #            lower_warning_threshold_breached = True
    #            is_warning_alarm = True
    #    if 'critical_lower_threshold' in alert:
    #        lower_critical_threshold = alert['critical_lower_threshold']
    #        if not lower_threshold_exists:
    #            lower_threshold = lower_critical_threshold
    #        lower_threshold_exists = True
    #        # IF CONFIG HAS A CRITICAL THRESHOLD SET AND WE PASS THAT THEN THAT AS
    #        # OUR THRESHOLD FOR ALERTING
    #        if minvalue <= lower_critical_threshold:
    #            lower_threshold = lower_critical_threshold
    #            lower_critical_threshold_breached = True
    #            is_critical_alarm = True
    #    lower_threshold_breached = (lower_warning_threshold_breached or
    #                                lower_critical_threshold_breached)
    #    # THIS HAS TO MEAN THERE IS A PROBLEM WITH THE ALERT CONFIG
    #    if lower_threshold is None and upper_threshold is None:
    #        logger.debug(
    #            "ERROR: alert {} does not have any thresholds set on {}".format(
    #                alert['id'], tag))
    #    # ON TO OCCURRENCES
    #    if 'occurrences_threshold' in alert:
    #        occurrences_threshold = alert['occurrences_threshold']
    #    else:
    #        occurrences_threshold = 1
    #    alert_entity = "Metric: {} for {}".format(alert['id'], tag)
    #    if 'url' not in alert:
    #        alert['url'] = os.environ['AOM_GRAFANA_URL'] + str(alert['id'])
    # ====================
    # PREPARE ALERT BODY STRING AND SET THE VALUE WE WILL USE TO ALERT WITH
    # ====================
    #    alert_body = ''
    #    if upper_threshold_breached:
    #        alert_body = "{}\n{:.2f} >= {}\n{}".format(
    #            alert_entity, value, upper_threshold, alert['url'])
    #    if lower_threshold_breached:
    #        value = minvalue
    #        alert_body = "{}\n{:.2f} <= {}\n{}".format(
    #            alert_entity, value, lower_threshold, alert['url'])
    # SEND SOME STATS OUT AT THIS POINT AS WE KNOW WHERE WE ARE NOW. SEND THE
    # THRESHOLDS TOO SO THEY CAN BE GRAPHED
    ###  BREEL TODO ###
    # if result is not None:
    #     send_metrics(alert, value, result)
    #     if 'critical_upper_threshold' in alert:
    #         send_stat('upper_critical_threshold', upper_critical_threshold,
    #                   {'id': alert['id']})
    #     if 'warning_upper_threshold' in alert:
    #         send_stat('upper_warning_threshold', upper_warning_threshold,
    #                   {'id': alert['id']})
    #     if 'critical_lower_threshold' in alert:
    #         send_stat('lower_critical_threshold', lower_critical_threshold,
    #                   {'id': alert['id']})
    #     if 'warning_lower_threshold' in alert:
    #         send_stat('lower_warning_threshold', lower_warning_threshold,
    #                   {'id': alert['id']})
    # ====================
    # APPLY OUR LOGIC TO MAKE SOME DECISIONS
    # ====================
    #current_alert_status = alert_status[0]
    #if not lower_threshold_breached and not upper_threshold_breached:
    #    # if result is not None:
    #    #     if lower_threshold_exists and not upper_threshold_exists:
    #    #         alert_body = "{}\n{:.2f} > {}\n{}".format(
    #    #             alert_entity, value, lower_threshold, alert['url'])
    #    #         logger.debug("GOOD: alert {} is higher than lower threshold {}"
    #    #                      "for value {} on tag {}".format(
    #    #                          alert['id'], lower_threshold, value, tag))
    #    #     if upper_threshold_exists and not lower_threshold_exists:
    #    #         alert_body = "{}\n{:.2f} < {}\n{}".format(
    #    #             alert_entity, value, upper_threshold, alert['url'])
    #    #         logger.debug("GOOD: alert {} is below the upper threshold {} "
    #    #                      "for value {} on tag {}".format(
    #    #                          alert['id'], upper_threshold, value, tag))
    #    #     if upper_threshold_exists and lower_threshold_exists:
    #    #         alert_body = "{}\n{} < {:.2f} < {}\n{}".format(
    #    #             alert_entity, lower_threshold, value, upper_threshold,
    #    #             alert['url'])
    #    #         logger.debug("GOOD: alert {} is between thresholds {} and {} "
    #    #                      "for value {} on tag {}".format(
    #    #                          alert['id'], upper_threshold, lower_threshold,
    #    #                          value, tag))
    #    # CHECK AND SEE IF TAG LOGIC IS SET, IE. WE WERE PREVIOUSLY IN ALARM
    #    # STATE
    #    #if alert['alert_tags'][tag] > 0:
    #    #    if result is not None:
    #    #        send_metrics(alert, 1, result, current_alert_status)
    #    #    logger.info(
    #    #        "TestInfo: RECOVERY: Clearing values for [{}] - {}".format(
    #    #            alert['id'], tag))
    #    #    if result is None:
    #    #        alert_body = ("{} RECOVERY due to no results found from "
    #    #                      "KairosDB query. Recommend you manually validate"
    #    #                      "recovery.\n{}").format(
    #    #                          alert_entity, alert['url'])
    #    #    alert['alert_tags'][tag] = 0
    #    #    alert['alert_tags'][tag_count] = 0
    #    #    if availability:
    #    #        logger.info("Sending availability stat 1")
    #    #        send_metrics(alert, 1, result, 'service_level')
    #    #else:
    #    #    # WE RETURN NONE IF NO ALERT (EITHER RECOVERY OR WARNING OR
    #    #    # CRITICAL) NEEDS TO BE FIRED
    #    #    alert['alert_tags'][tag_count] = 0
    #    #    if availability:
    #    #        logger.info("Sending availability stat 1")
    #    #        send_metrics(alert, 1, result, 'service_level')
    #    #    return None
    #else:
         ###   BREEL WORKING HERE   ###
        # ====================
        # SET KEY / VALUE FOR TAG ON ALERT
        # 0 == No Alert
        # 1 == Warning
        # 2 == Existing Warning Alert
        # 3 == New Critical
        # 4+ == Existing Critical Alert
        # ====================
        # CHECK IF TAG_COUNT HAS BEEN SET, IF NOT SET IT, IF SO INCREMENT IT
        #   alert['alert_tags'][tag_count] += 1
        # ALERT WONT FIRE UNLESS THE TAG_COUNT IS MORE THAN THE OCCURRENCES,
        # THAT BEING EITHER 1 OR WHATEVER WAS SET ALERT HAS EXCEEDED
        # OCCURRENCES SO RETURN IT
        #   TODO this doesnt belog in Alert.py
        #if alert['alert_tags'][tag_count] >= occurrences_threshold:
        #    # >= 4 MEANS THIS IS A KNOWN CRITICAL, SO NO-OP
        #    if alert['alert_tags'][tag] < 4:
        #        if is_warning_alarm and not is_critical_alarm:
        #            # THIS HANDLES GOING STRAIGHT FROM NORMAL TO WARNING LEVEL
        #            if alert['alert_tags'][tag] == 0:
        #                # NEW WARNING
        #                alert['alert_tags'][tag] = 1
        #                logger.info("TestInfo: WARNING (NEW): {} - {}".format(
        #                    alert['id'], tag))
        #            else:
        #                # EXISTING WARNING
        #                alert['alert_tags'][tag] = 2
        #                logger.info("TestInfo: WARNING (EXISTING): {} - {}".format(
        #                    alert['id'], tag))
        #        if is_critical_alarm:
        #            # THIS HANDLES GOING FROM WARNING LEVEL TO CRITICAL LEVEL
        #            if (alert['alert_tags'][tag] == 1 or
        #                    alert['alert_tags'][tag] == 2):
        #                alert['alert_tags'][tag] = 3
        #                logger.info("TestInfo: CRITICAL (WAS WARNING): {} - {}".format(
        #                    alert['id'], tag))
        #            else:
        #                # THIS HANDLES GOING STRAIGHT FROM NORMAL TO CRITICAL
        #                # LEVEL
        #                if alert['alert_tags'][tag] < 3:
        #                    # NEW CRITICAL
        #                    alert['alert_tags'][tag] = 3
        #                    logger.info("TestInfo: CRITICAL (NEW): {} - {}".format(
        #                        alert['id'], tag))
        #                else:
        #                    # EXISTING CRITICAL
        #                    alert['alert_tags'][tag] = 4
        #                    logger.info("TestInfo: CRITICAL (EXISTING): {} - {}".format(
        #                        alert['id'], tag))
            # RECORD THE FACT THAT SOMETHING IS STILL IN ALARM STATE IN METRICS
            # EVEN IF NOT ACTIVELY ALERTING ON IT
            #  #if is_critical_alarm:
            #      #current_alert_status = alert_status[3]
            #      #send_metrics(alert, 2, result, current_alert_status)
            #      #if availability:
            #      #    logger.info("Sending availability stat 0")
            #      #    send_metrics(alert, 0, result, 'service_level')
            #  #if is_warning_alarm and not is_critical_alarm:
            #      #current_alert_status = alert_status[1]
            #      #send_metrics(alert, 1, result, current_alert_status)
            #      #if availability:
            #      #    logger.info("Sending availability stat 1")
            #      #    send_metrics(alert, 1, result, 'service_level')
            #  logger.debug("{} alert for value {} of {} for tag {} has occurred "
            #               "{} times. Threshold is >= {} times.".format(
            #                   current_alert_status,
            #                   value,
            #                   alert['id'],
            #                   tag,
            #                   alert['alert_tags'][tag_count],
            #                   occurrences_threshold))
    #    else:
    #        # WE RETURN NONE IF NO ALERT (EITHER RECOVERY OR WARNING OR
    #        # CRITICAL) NEEDS TO BE FIRED
    #        logger.debug("Value {} of {} for tag {} has occurred {} time(s) < "
    #                     "threshold of {}".format(
    #                         value,
    #                         alert['id'],
    #                         tag,
    #                         alert['alert_tags'][tag_count],
    #                         occurrences_threshold))
    #        if availability:
    #            logger.info("Sending availability stat")
    #            send_metrics(alert, 1, result, 'service_level')
    #        return None
    #logger.debug(
    #    "Alert {}->[{}]->{}, Occurrences={}".format(
    #        alert['id'], tag, current_alert_status,
    #        alert['alert_tags'][tag_count]))
    #return alert_entity, alert_body, alert['alert_tags'][tag], alert_tags, md5(tag.encode('utf-8')).hexdigest()[:10]
 def check_kairosdb_alert(
        alert_config,
        service_config,
        logger,
        production_mode=True):
    """
    Args:
        alert_config (dict): Config of the alert to run
        service_config (dict): Holds things like urls, tokens and other things
        logger (log object): does the logging
    Returns:
        None
    """
    availability = False
    # SLEEP A RANDOM TIME BETWEEN 0 AND INTERVAL SO THAT ALL ALERTS DON'T
    # START AT THE SAME TIME
    wait_time = random.randint(0, alert_config['interval'])
    logger.info(
        "ALERT_CONFIG: {}\tsleep: {}".format(
            alert_config['id'],
            wait_time))
    sleep(wait_time)
    # For metrics with availability set to true, we default the interval to 5
    # mins due Grafana limitations
    if 'availability' in alert_config and alert_config['availability']:
        availability = True
    # ====================
    # EACH CHECK JUST LOOPS
    # ====================
    ret = None
    while True:
        try:
            send_stat("check_run", 1, {'id': alert_config['id']})
            # BUILD URL FOR KAIROSDB METRICS AND QUERY FOR RESULTS
            query_url = os.path.join(
                service_config['kairosdb_url'] +
                "api/v1/datapoints/query")
            ret = requests.post(
                query_url,
                data=json.dumps(
                    alert_config['query']),
                timeout=service_config['timeout'])
            assert ret.status_code == 200
            # GOT DATA BACK, NOW TO COMPARE IT TO THE THRESHOLD
            results = ret.json()['queries'][0]['results']
            logger.debug(
                "Got back {} results for alert {}".format(
                    len(results), alert_config['id']))
            log_alert_results(results, alert_config, logger)
            alert_list = []
            # LOOP THROUGH ALL THE RESULTS
            for r in results:
                alert_tags = (get_alert_tags(alert_config, r)
                              if has_custom_alert_routing(alert_config) else None)
                # OUR QUERY RETURNED SOME VALUES - FIND MIN AND MAX VALUES
                # THEREIN AND EXAMINE FOR FAILURE
                if r['values']:
                    minvalue = min([x[1] for x in r['values']])
                    maxvalue = max([x[1] for x in r['values']])
                    # SEND VALUES TO BUILD_ALERT_MESSAGE, WHICH RETURNS NONE OR
                    # AN OBJECT
                    alert_list.append(
                        build_alert_message(
                            alert_config,
                            minvalue,
                            maxvalue,
                            r,
                            logger,
                            availability,
                            alert_tags=alert_tags))
                # THIS MEANS OUR KAIROS QUERY RETURNED NOTHING. COULD BE NETWORK
                # ISSUES. WE WILL TOLERATE THIS FOR X OCCURRENCES. (X=10)
                # AFTER X OCCURRENCES OF KAIROS NOT RETURNING DATA WE WILL CLEAR
                # AOM'S BRAIN FOR THIS ALERT ID AND TAG COMBINATION TO AVOID A
                # LATER OCCURRENCE CAUSING A PREMATURE ALERT.
                # A NO-OP IF NO HISTORY.
                elif 'alert_tags' in alert_config:
                    for key in alert_config['alert_tags']:
                        if ('count' not in key and 'noresult' not in key and
                                alert_config['alert_tags'][key] > 0):
                            key_noresult = key + "_noresult"
                            key_count = key + "_count"
                            if alert_config['alert_tags'][key_noresult] > 10:
                                logger.info("{} occurrences of no results back "
                                            "for {}, clear out counts for tag '{}'".format(
                                                alert_config['alert_tags'][key_noresult],
                                                alert_config['id'], key))
                                alert_list.append(
                                    build_alert_message(
                                        alert_config,
                                        0,
                                        0,
                                        None,
                                        logger,
                                        availability,
                                        key,
                                        alert_tags=alert_tags))
                                alert_config['alert_tags'][key] = 0
                                alert_config['alert_tags'][key_count] = 0
                                alert_config['alert_tags'][key_noresult] = 0
                            else:
                                alert_config['alert_tags'][key_noresult] += 1
                                logger.info("{} occurrences of no results back "
                                            "for {}, tag '{}'".format(
                                                alert_config['alert_tags'][key_noresult],
                                                alert_config['id'], key))
            # SEND ALL ALERTS FOUND TO THE ALERT HANDLERS THAT ARE NOT NONE
            for alert in [x for x in alert_list if x is not None]:
                if production_mode:
                    send_alerts(
                        alert,
                        copy.deepcopy(alert_config),
                        service_config['victorops_url'],
                        service_config['slack_url'],
                        service_config['slack_token'],
                        service_config['smtp_server'],
                        service_config['sensu_endpoint'],
                        service_config['uchiwa_url'],
                        logger)
                else:
                    logger.info(
                        "Sending alert for: {}".format(
                            alert_config.get('id')))
        # HANDLE THE UNEXPECTED
        except TimeoutError:
            logger.error("Query [{}] took to long to run".format(
                alert_config['id']))
        except AssertionError:
            logger.error(
                "KairsoDB query failed: {}\n"
                "HTTP status code:\t{}\n"
                "Error Message:\t{}\nQuery:\n"
                "{}".format(
                    ret.url,
                    ret.status_code,
                    ret.text,
                    alert_config['query']))
        except gaierror:
            logger.error(
                "Unable to connect to smtp server: {}".format(
                    service_config['smtp_server']))
        except Exception as e:
            logger.error(
                "Unhandled exception {} on alert: {}".format(
                    str(e), alert_config['id']))
        finally:
            sleep(alert_config['interval'])
 def check_prometheus_alert(
        alert_config,
        service_config,
        logger,
        production_mode=True):
    """
    Args:
        alert_config (dict): Config of the alert to run
        service_config (dict): Holds things like urls, tokens and other things
        logger (log object): does the logging
    Returns:
        None
    """
    # SLEEP A RANDOM TIME BETWEEN 0 AND INTERVAL SO THAT ALL ALERTS DON'T
    # START AT THE SAME TIME
    wait_time = random.randint(0, alert_config['interval'])
    logger.info(
        "ALERT_CONFIG: {}\tsleep: {}".format(
            alert_config['id'],
            wait_time))
    sleep(wait_time)
    # For metrics with availability set to true, we default the interval to 5
    # mins due to Grafana limitations
    availability = bool(alert_config.get('availability'))
    # ====================
    # EACH CHECK JUST LOOPS
    # ====================
    ret = None
    while True:
        try:
            send_stat("check_run", 1, {'id': alert_config['id']})
            prom_api = PromAPI(endpoint=alert_config['prometheus_url'])
            ret = prom_api.query_range(
                query=alert_config['query'],
                start=alert_config['start_time'],
                end=alert_config['end_time'],
                duration=alert_config['interval'])
            assert ret['status'] == 'success'
            # GOT DATA BACK, NOW TO COMPARE IT TO THE THRESHOLD
            results = ret['data']['result']
            logger.debug(
                "Got back {} results for alert {}".format(
                    len(results), alert_config['id']))
            log_alert_results(results, alert_config, logger)
            alert_list = []
            # LOOP THROUGH ALL THE RESULTS
            for r in results:
                alert_tags = (get_alert_tags(alert_config, r) if
                              has_custom_alert_routing(alert_config) else None)
                # REARRANGE RESULT TO MORE CLOSELY MATCH KAIROSDB RESULT
                r['tags'] = {key: [value]
                             for (key, value) in r['metric'].items()}
                # OUR QUERY RETURNED SOME VALUES - FIND MIN AND MAX VALUES
                # THEREIN AND EXAMINE FOR FAILURE
                if r['values']:
                    raw_values = [value for _, value in r['values']]
                    min_value = float(min(raw_values))
                    max_value = float(max(raw_values))
                    # SEND VALUES TO BUILD_ALERT_MESSAGE, WHICH RETURNS NONE OR
                    # AN OBJECT
                    alert_list.append(
                        build_alert_message(
                            alert_config,
                            min_value,
                            max_value,
                            r,
                            logger,
                            availability,
                            alert_tags=alert_tags))
                # THIS MEANS OUR QUERY RETURNED NOTHING. COULD BE NETWORK ISSUES
                # WE WILL TOLERATE THIS FOR X OCCURRENCES. (X=10)
                # AFTER X OCCURRENCES OF NOT RETURNING DATA WE WILL CLEAR AOM'S
                # BRAIN FOR THIS ALERT ID AND TAG COMBINATION TO AVOID A LATER
                # OCCURRENCE CAUSING A PREMATURE ALERT. A NO-OP IF NO HISTORY.
                elif 'alert_tags' in alert_config:
                    for key in alert_config['alert_tags']:
                        if ('count' not in key and 'noresult' not in key and
                                alert_config['alert_tags'][key] > 0):
                            key_noresult = key + "_noresult"
                            key_count = key + "_count"
                            if alert_config['alert_tags'][key_noresult] > 10:
                                logger.info("{} occurrences of no results back "
                                            "for {}, clear out counts for tag '{}'".format(
                                                alert_config['alert_tags'][key_noresult],
                                                alert_config['id'], key))
                                alert_list.append(
                                    build_alert_message(
                                        alert_config,
                                        0,
                                        0,
                                        None,
                                        logger,
                                        availability,
                                        key,
                                        alert_tags=alert_tags))
                                alert_config['alert_tags'][key] = 0
                                alert_config['alert_tags'][key_count] = 0
                                alert_config['alert_tags'][key_noresult] = 0
                            else:
                                alert_config['alert_tags'][key_noresult] += 1
                                logger.info("{} occurrences of no results back "
                                            "for {}, tag '{}'".format(
                                                alert_config['alert_tags'][key_noresult],
                                                alert_config['id'], key))
            # SEND ALL ALERTS FOUND TO THE ALERT HANDLERS THAT ARE NOT NONE
            for alert in [x for x in alert_list if x is not None]:
                if production_mode:
                    send_alerts(
                        alert,
                        copy.deepcopy(alert_config),
                        service_config['victorops_url'],
                        service_config['slack_url'],
                        service_config['slack_token'],
                        service_config['smtp_server'],
                        service_config['sensu_endpoint'],
                        service_config['uchiwa_url'],
                        logger)
                else:
                    logger.info(
                        "Sending alert {}".format(
                            alert_config.get('id')))
        # HANDLE THE UNEXPECTED
        except TimeoutError:
            logger.error(
                "Query [{}] took to long to run".format(
                    alert_config['id']))
        except AssertionError:
            logger.error(
                "Prometheus query failed:\n"
                "Status:\t{}\n"
                "Error Type:\t{}\n"
                "Error Message:\t{}\n"
                "Query:\n{}".format(
                    ret['status'],
                    ret['errorType'],
                    ret['error'],
                    alert_config['query']))
        except gaierror:
            logger.error(
                "Unable to connect to smtp server: {}".format(
                    service_config['smtp_server']))
        except Exception as e:
            logger.error(
                "Unhandled exception {} on alert: {}".format(
                    str(e), alert_config['id']))
        finally:
            sleep(alert_config['interval'])
 # LOG ALERT RESULTS SO WE CAN DEBUG IF NEEDED
 def log_alert_results(results, alert_config, logger):
    """
    Logs the results broken out by tag provided in the alert_config to the
    logger for debuging
    Args:
        results: the results object returned from the call to kairosdb, of just
        the results
        alert_config: config object of the alert
        logger (log object): does the logging
    Returns:
        None, logs to logger
    """
    for v in results:
        logger.debug("{} - Result: {}".format(alert_config['id'], v))
 def send_alerts(
        alert,
        alert_config,
        victorops_url,
        slack_url,
        slack_token,
        smtp_server,
        sensu_endpoint,
        uchiwa_url,
        logger):
    """
    Sends out the alerts to VO, Email, and/or Slack
    Args:
        alert: the alert tuple:
            alert[0] == subject alert[1] == body alert[3] == alert_tags alert[4] == md5sum
        alert_config: the alert configuration object
        victorops_url: url to victorops
        slack_url: url to slack api calls
        slack_token: the token for the alert
        smtp_server: The server to send mail messages too
        sensu_endpoint:
        uchiwa_url:
        logger (log object): does the logging
    Returns: None
    """
    # GOING TO USE THIS FOR TAGGING SOME METRICS ABOUT WHAT ALERT CHANNEL WAS
    # USED
    tag_dict = dict()
    tag_dict['alert'] = alert_config['id']
    is_custom_alert_routing = has_custom_alert_routing(alert_config)
    if is_custom_alert_routing:
        alert_routing = alert_config.get('alert_routing_lookup', {})
        alert_config['alerts'] = alert_routing.get(
            alert[3], alert_config['alerts']['lookup']['default'])
    # once we move all alerts into sensu, we dont need to tho this
    if 'filters' in alert_config:
        logger.info(
            "alert_status : {}, alert_config: {}".format(
                alert[2], alert_config))
        if 'slack_subdue' in alert_config['filters'] and alert[2] in (
                1, 2) and alert_config['filters']['slack_subdue']:
            # unless the alert is critical we dont send it
            logger.info("Removed slack, alert_config: {}".format(alert_config))
            alert_config['alerts'].pop('slack', None)
        if ('victorops_subdue' in alert_config['filters'] and
                alert[2] in (1, 2) and
                alert_config['filters']['victorops_subdue']):
            # unless the alert is critical we dont send it
            alert_config['alerts'].pop('vo', None)
            logger.info("Removed vo, alert_config: {}".format(alert_config))
    # ====================
    # VICTOROPS HANDLING
    # ====================
    if 'vo' in alert_config['alerts']:
        for notify in alert_config['alerts']['vo']:
            payload = dict(entity_id=alert[0],
                           message_type=alert_status[alert[2]],
                           state_message=alert[1])
            r = None
            try:
                r = requests.post(
                    victorops_url + notify,
                    data=json.dumps(payload),
                    headers={
                        "Content-type": "application-json"})
                assert r.status_code == 200
                # Record a VO alert sent event
                tag_dict['alert_channel_type'] = "VictorOps"
                tag_dict['who'] = "vo:{}".format(notify)
                send_stat("alert_channel", 1, tag_dict)
                # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
            except AssertionError:
                logger.error(
                    "Post to VO failed for {}\n{}:\t{}".format(
                        alert_config['id'], r.status_code, r.text))
            except Exception as e:
                logger.error("Unhandled exception for alert_id:{} "
                             "when posting to VO: {}".format(
                                 alert_config['id'], str(e)))
    # ====================
    # EMAIL HANDLING
    # ====================
    if 'email' in alert_config['alerts'] and (
            alert[2] == 0 or alert[2] == 1 or alert[2] == 3):
        msg = MIMEText(alert[1])
        msg['Subject'] = '{} Status: {}'.format(
            alert[0], alert_status[alert[2]])
        msg['From'] = 'aom@qualtrics.com'
        msg['To'] = ','.join(
            [x + "@qualtrics.com" for x in alert_config['alerts']['email']])
        try:
            s = smtplib.SMTP(smtp_server)
            s.send_message(msg)
            s.quit()
            # Record an Email alert sent event
            tag_dict['alert_channel_type'] = "Email"
            tag_dict['who'] = "email:{}".format(msg['To'])
            send_stat("alert_channel", 1, tag_dict)
            # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
        except Exception as e:
            logger.error(
                "Unhandled exception when sending mail for {} to {}\n{}".format(
                    alert_config['id'], smtp_server, str(e)))
    # ====================
    # SENSU HANDLING
    # ====================
    if 'sensu' in alert_config['alerts']:
        # Dictionary with static values for Sensu
        sensu_dict = {
            'source': 'AOM',
            'refresh': 3600,
            'occurrences': 1,
            'name': alert_config['id']+'__'+alert[4]}
        # if alert[3]:
        #     logger.info(alert)
        #     sensu_dict['name'] = '_'.join(
        #         [alert_config['id']] + sorted(list(alert[3])))
        if 'refresh' in alert_config:
            sensu_dict['refresh'] = alert_config['refresh']
        sensu_dict['interval'] = alert_config['interval']
        sensu_dict['handlers'] = []
        sensu_dict['dashboard'] = alert_config['url']
        if 'dependencies' in alert_config['alerts']['sensu'].keys():
            sensu_dict['dependencies'] = (alert_config['alerts']
                                          ['sensu']['dependencies'])
        if 'victorops' in alert_config['alerts']['sensu'].keys():
            sensu_dict['handlers'].append("victorops")
            sensu_dict['routing_key'] = (alert_config['alerts']
                                         ['sensu']['victorops'])
        # # Leave this here until we have email support in Sensu
        # if 'email' in alert_config['alerts']['sensu'].keys():
        #     sensu_dict['handlers'].append("email")
        #     # verify this option
        #     sensu_dict['email'] = alert_config['alerts']['sensu']['email']
        if 'slack' in alert_config['alerts']['sensu'].keys():
            sensu_dict['handlers'].append("slack")
            sensu_dict['slack_channel'] = (
                alert_config['alerts']['sensu']['slack'])
            # Format alert message
            sensu_dict['dashboard'] = (
                "<{}|here> , Uchiwa: <{}?check={}|here> ".format(
                    alert_config['url'], uchiwa_url, alert_config['id']))
        if 'jira' in alert_config['alerts']['sensu'].keys():
            sensu_dict['handlers'].append("jira")
            sensu_dict.update(alert_config['alerts']['sensu']['jira'])
        if 'filters' in alert_config:
            sensu_dict['filters'] = alert_config['filters']
        # 0 = OK, 1 = WARNING, 2 = CRITICAL
        sensu_status = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2}
        sensu_dict['status'] = sensu_status[alert[2]]
        sensu_dict['output'] = alert[1]
        r = None
        try:
            user = os.environ['API_USER']
            passwd = os.environ['API_PASS']
            r = requests.post(
                sensu_endpoint,
                json.dumps(sensu_dict),
                auth=(
                    user,
                    passwd))
            assert r.status_code == 202
        except AssertionError:
            logger.error(
                "Post to Sensu failed  {}\n{}:\t{}".format(
                    alert_config['id'],
                    r.status_code,
                    r.text))
        except Exception as e:
            logger.error("Unhandled exception for alert_id:{} "
                         "when posting to Sensu: {}".format(
                             alert_config['id'], str(e)))
    # ====================
    # SLACK HANDLING - all Slack alerts will go through Sensu
    # ====================
    if 'slack' in alert_config['alerts'] and (
            alert[2] == 0 or alert[2] == 1 or alert[2] == 3):
        refresh = alert_config.get('refresh', 3600)
        dashboard = alert_config.get('url', '')
        sensu_status = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2}
        sensu_dict2 = {'handlers': ['slack'],
                       'interval': alert_config['interval'],
                       'source': 'AOM',
                       'refresh': refresh,
                       'occurrences': 1,
                       'name': alert_config['id']+'__'+alert[4],
                       'dashboard': dashboard,
                       'status': sensu_status[alert[2]],
                       'output': alert[1]}
        if is_custom_alert_routing:
            sensu_dict2['name'] = '_'.join(
                [alert_config['id']] + list(alert[3]))
        sensu_dict2['dashboard'] = (
            "<{}|here> , Uchiwa: <{}?check={}|here> ".format(
                alert_config['url'], uchiwa_url, alert_config['id']))
        for channel in alert_config['alerts']['slack']:
            sensu_dict2['slack_channel'] = channel
            r = None
            try:
                user = os.environ['API_USER']
                passwd = os.environ['API_PASS']
                r = requests.post(
                    sensu_endpoint,
                    json.dumps(sensu_dict2),
                    auth=(
                        user,
                        passwd))
                assert r.status_code == 202
            except AssertionError:
                logger.error(
                    "Post to Sensu failed  {}\n{}:\t{}".format(
                        alert_config['id'], r.status_code, r.text))
            except Exception as e:
                logger.error("Unhandled exception for alert_id:{} when posting"
                             "to Sensu: {}".format(alert_config['id'], str(e)))
 # payload = dict(token=slack_token, channel=channel,
 #                text="{} Status: {}".format(alert[1], alert_status[alert[2]]))
 # r = None
 # try:
 #     r = requests.post(slack_url, data=payload)
 #     assert r.status_code == 200
 #     # Record an Slack alert sent event
 #     tag_dict['alert_channel_type'] = "Slack"
 #     tag_dict['who'] = "slack:{}".format(channel)
 #     send_stat("alert_channel", 1, tag_dict)
 #     # logger.info("TestInfo: {} alert for {}".format(alert_status(alert[2]), alert[0]))
 # except AssertionError:
 #     logger.error("Post to Slack failed for {}\n{}:\t{}".format(alert_config['id'], r.status_code, r.text))
 # except Exception as e:
 #     logger.error("Unhandled exception for alert_id:{} when posting to Slack: {}".format(alert_config['id'],
 # str(e)))
 def send_metrics(alert, value, result, gaugename='stats'):
    """
    Sends the results from the alert check to statsd
    Args:
        alert: The Alert config object that holds the alert['tag'] value.
        gaugename: The name of the gauge metric we send.
        value: The value we want to send as a gauge.
        result: The result object from making the call. Use the data in this
        object to tag the metric.
    Returns: None
    """
    # GROUP ALL THE ALERTS TOGETHER SO THAT PEEPS CAN FILTER OUT BY TAG THEIR
    # SPECIFIC ALERTS
    result_tags = list(itertools.chain(
        *[result['tags'][x] for x in alert['tags']]))
    tag_dict = dict()
    for x in range(len(alert['tags'])):
        tag_dict[alert['tags'][x]] = result_tags[x]
    tag_dict['alert'] = alert['id']
    # SEND THE METRIC
    send_stat(gaugename, value, tag_dict)
 def send_stat(gaugename, value, tag_dict, statprefix='aom'):
    """Sends stats value to statsd"""
    client = StatsClient('telegraf', 8125, statprefix)
    # SUBMIT STATS
    client.gauge(gaugename, value, tags=tag_dict)
 def has_custom_alert_routing(alert_config):
    """Checks if alert has custom routing"""
    return 'lookup' in alert_config['alerts']
 def get_alert_tags(alert_config, query_result):
    """Retrieves custom tags from alert"""
    query_tags = []
    for tag in alert_config['alerts']['lookup']['tags']:
        if (alert_config.get('query_type') == 'prometheus' and
                'metric' in query_result and
                tag in query_result['metric']):
            query_tags.append(query_result['metric'][tag])
        elif ('tags' in query_result and tag in query_result['tags']
              and query_result['tags'][tag]):
            query_tags.append(query_result['tags'][tag][0])
    return tuple(query_tags)
--- a/AoM_Service/library/serviceapp/test_alert.py
+++ b/AoM_Service/library/serviceapp/test_alert.py
@@ -0,0 +1,123 @@
 import unittest
 class Mock_Alert_Config() :
   def __init__(self) :
      self.cache = {}
      self.level = {}
      self.id = "id"
   def set_level(self, k, v) :
      self.level[k] = v
   def get_level(self, k) :
      if not k in self.level :
         return None
      return self.level[k]
   def init_for_tags(self, *args) :
      pass
   def occurrences(self) :
      return 1
   def get_threshold(self, upper, warning) :
      if warning :
         return None, False
      if upper :
         return 10, True
      else :
         return 0, True
   def get_tags(self) :
      return "tagsC, tagsD".split(", ")
   def set_for_tags(self, key, value) :
      if not key in self.cache :
         self.cache[key] = 0
      self.cache[key] = value
   def get_for_tags(self, key) :
      if not key in self.cache :
         self.cache[key] = 0
      return self.cache[key]
 class Mock_Result() :
   def __init__(self) :
      pass
   def __getitem__(self, key) :
      if key == "tags" :
         return self
      else :
         return key
 class Mock_Logger() :
   def __init__(self) :
      for k in ["error", "warn", "debug", "info", "warning"] :
         setattr(self, k, self.log)
   def log(self, *args) :
      pass
 class Test_Alert(unittest.TestCase) :
   def test_set_tags(self) :
      import alert
      ac = Mock_Alert_Config()
      res = Mock_Result()
      al = alert.Alert(ac, Mock_Logger(), None, None, -1, 11)
      self.assertEqual(al.get_tags(), "instance")
      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 11)
      self.assertEqual(al.get_tags(), "tagsA, tagsB")
      al.set_tags("a, b, c", res)
      self.assertEqual(al.get_tags(), "a, b, c")
      al.set_tags("a, b, c", res)
      self.assertEqual(al.get_tags(), "a, b, c")
   def test_firing(self) :
      import alert
      ac = Mock_Alert_Config()
      res = Mock_Result()
      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 11)
      self.assertTrue(al.get_firing())
      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 1, 11)
      self.assertTrue(al.get_firing())
      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, -1, 9)
      self.assertTrue(al.get_firing())
      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 1, 9)
      self.assertFalse(al.get_firing())
   def test_str(self) :
      import alert
      ac = Mock_Alert_Config()
      res = Mock_Result()
      alert = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 10)
      self.assertEqual(alert.name(), "Metric: id for tagsA, tagsB")
      self.assertEqual(alert.body(), "")
   def test_occurrences(self) :
      import alert
      ac = Mock_Alert_Config()
      res = Mock_Result()
      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 10)
      self.assertEqual(False, al.occurrences_breached)
      al.set_occurrences()
      al.set_occurrences()
      al.set_occurrences()
      self.assertEqual(False, al.occurrences_breached)
      self.assertEqual(0, ac.get_for_tags(al.get_tags()))
      al = alert.Alert(ac, Mock_Logger(), "tagsA, tagsB", res, 0, 11)
      self.assertEqual(True, al.occurrences_breached)
      al.set_occurrences()
      al.set_occurrences()
      al.set_occurrences()
      self.assertEqual(True, al.occurrences_breached)
      self.assertEqual(4, ac.get_for_tags(al.get_tags()))
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/serviceapp/test_alert_factory.py
+++ b/AoM_Service/library/serviceapp/test_alert_factory.py
@@ -0,0 +1,33 @@
 import unittest
 import alert_factory
 class Mock_Alert() :
   def __init__(self, *args) :
      self.args = args
 class Mock_Logger() :
   def __init__(self) :
      self.info = self.log
      self.warn = self.log
      self.warning = self.log
      self.error = self.log
      self.debug = self.log
   def log(self, *args, **kwargs) :
      print(args, kwargs)
 class Test_Alert_Factory(unittest.TestCase) :
   def setUp(self) :
      self.was = alert_factory.Alert
      alert_factory.Alert = Mock_Alert
   def tearDown(self) :
      alert_factory.Alert = self.was
   def test(self) :
      af = alert_factory.Alert_Factory(None, Mock_Logger())
      alert = af.build(0, 5, None, "tagA, tagB", False, "tagC, tagD")
      self.assertTrue(type(alert) == Mock_Alert)
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/serviceapp/test_service.py
+++ b/AoM_Service/library/serviceapp/test_service.py
@@ -0,0 +1,8 @@
 import unittest
 class Test_Service(unittest.TestCase) :
   def test(self) :
      raise Exception("not impl")
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/serviceapp/test_threshold.py
+++ b/AoM_Service/library/serviceapp/test_threshold.py
@@ -0,0 +1,14 @@
 import unittest
 class Test_Threshold(unittest.TestCase) :
   def test(self) :
      import threshold
      tl = threshold.Threshold(5)
      self.assertFalse(tl.can_breach())
      self.assertFalse(tl.exceeds(7))
      self.assertFalse(tl.exceeds(3))
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/serviceapp/test_threshold_lower.py
+++ b/AoM_Service/library/serviceapp/test_threshold_lower.py
@@ -0,0 +1,14 @@
 import unittest
 class Test_Threshold_Lower(unittest.TestCase) :
   def test(self) :
      import threshold_lower
      tl = threshold_lower.Threshold_Lower(5)
      self.assertTrue(tl.can_breach)
      self.assertTrue(tl.exceeds(3))
      self.assertFalse(tl.exceeds(7))
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/serviceapp/test_threshold_upper.py
+++ b/AoM_Service/library/serviceapp/test_threshold_upper.py
@@ -0,0 +1,14 @@
 import unittest
 class Test_Threshold_Upper(unittest.TestCase) :
   def test(self) :
      import threshold_upper
      tl = threshold_upper.Threshold_Upper(5)
      self.assertTrue(tl.can_breach)
      self.assertTrue(tl.exceeds(7))
      self.assertFalse(tl.exceeds(3))
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/serviceapp/test_thresholds.py
+++ b/AoM_Service/library/serviceapp/test_thresholds.py
@@ -0,0 +1,157 @@
 import unittest
 class Mock_Alert_Config() :
   def __init__(self) :
      self.upCrit = 10
      self.lowCrit = 1
   def get_threshold(self, upper, warn) :
      if upper and warn :
         return None, False
      elif upper and not warn :
         return self.upCrit, True
      elif not upper and warn :
         return None, False
      else:
         return self.lowCrit, True
 class Test_Thresholds(unittest.TestCase) :
   def test_breached_both(self) :
      import thresholds
      alert_config = Mock_Alert_Config()
      t = thresholds.Thresholds(alert_config)
      t.set_breached(alert_config.lowCrit-1, alert_config.upCrit+1)
      should_fire = [
         t.critical_breached(),
         t.lower_breached(),
         t.upper_breached(),
         t.level_breached(t.CRITICAL),
         t.end_breached(t.LOWER),
         t.end_breached(t.UPPER),
         t.get_breached(),
         t.get_breached(level=t.CRITICAL),
         t.get_breached(end=t.LOWER),
         t.get_breached(end=t.UPPER),
      ]
      for i in range(len(should_fire)) :
         self.assertTrue(should_fire[i], i)
      should_not_fire = [
         t.warning_breached(),
         t.level_breached(t.WARNING),
         t.get_breached(level=t.WARNING),
      ]
      for i in range(len(should_not_fire)) :
         self.assertFalse(should_not_fire[i], i)
   def test_breached_lower(self) :
      import thresholds
      alert_config = Mock_Alert_Config()
      t = thresholds.Thresholds(alert_config)
      t.set_breached(alert_config.lowCrit-1, alert_config.upCrit)
      should_fire = [
         t.critical_breached(),
         t.lower_breached(),
         t.level_breached(t.CRITICAL),
         t.end_breached(t.LOWER),
         t.get_breached(),
         t.get_breached(level=t.CRITICAL),
         t.get_breached(end=t.LOWER),
      ]
      for i in range(len(should_fire)) :
         self.assertTrue(should_fire[i], i)
      should_not_fire = [
         t.warning_breached(),
         t.upper_breached(),
         t.level_breached(t.WARNING),
         t.end_breached(t.UPPER),
         t.get_breached(level=t.WARNING),
         t.get_breached(end=t.UPPER),
      ]
      for i in range(len(should_not_fire)) :
         self.assertFalse(should_not_fire[i], i)
   def test_breached_upper(self) :
      import thresholds
      alert_config = Mock_Alert_Config()
      t = thresholds.Thresholds(alert_config)
      t.set_breached(alert_config.lowCrit, alert_config.upCrit+1)
      should_fire = [
         t.critical_breached(),
         t.upper_breached(),
         t.level_breached(t.CRITICAL),
         t.end_breached(t.UPPER),
         t.get_breached(),
         t.get_breached(level=t.CRITICAL),
         t.get_breached(end=t.UPPER),
      ]
      for i in range(len(should_fire)) :
         self.assertTrue(should_fire[i], i)
      for i in [
         t.warning_breached(),
         t.lower_breached(),
         t.level_breached(t.WARNING),
         t.end_breached(t.LOWER),
         t.get_breached(level=t.WARNING),
         t.get_breached(end=t.LOWER),
      ] :
         self.assertFalse(i)
   def test_breached_notset(self) :
      import thresholds
      alert_config = Mock_Alert_Config()
      t = thresholds.Thresholds(alert_config)
      for i in [
         t.warning_breached(),
         t.critical_breached(),
         t.upper_breached(),
         t.lower_breached(),
         t.level_breached(t.CRITICAL),
         t.level_breached(t.WARNING),
         t.end_breached(t.UPPER),
         t.end_breached(t.LOWER),
         t.get_breached(),
         t.get_breached(level=t.CRITICAL),
         t.get_breached(level=t.WARNING),
         t.get_breached(end=t.UPPER),
         t.get_breached(end=t.LOWER),
      ] :
         self.assertFalse(i)
   def test_get_matching(self) :
      import thresholds
      alert_config = Mock_Alert_Config()
      t = thresholds.Thresholds(alert_config)
      self.assertEqual(4, len([i for i in t.get_thresholds_matching()]))
      self.assertEqual(2, len([i for i in t.get_thresholds_matching(level=t.CRITICAL)]))
      self.assertEqual(2, len([i for i in t.get_thresholds_matching(level=t.WARNING)]))
      self.assertEqual(2, len([i for i in t.get_thresholds_matching(end=t.UPPER)]))
      self.assertEqual(2, len([i for i in t.get_thresholds_matching(end=t.LOWER)]))
      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.CRITICAL, level=t.LOWER)]))
      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.CRITICAL, level=t.UPPER)]))
      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.WARNING, level=t.LOWER)]))
      self.assertEqual(1, len([i for i in t.get_thresholds_matching(end=t.WARNING, level=t.UPPER)]))
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/serviceapp/threshold.py
+++ b/AoM_Service/library/serviceapp/threshold.py
@@ -0,0 +1,19 @@
 class Threshold() :
   def __init__(self, threshold) :
      self.threshold = threshold
      self.breached = False
   def can_breach(self) :
      return False
   def set_breached(self, value) :
      self.breached = self.exceeds(value)
   def get_breached(self) :
      return self.breached
   def exceeds(self, value) :
      return False
   def get_threshold(self) :
      return self.threshold
--- a/AoM_Service/library/serviceapp/threshold_lower.py
+++ b/AoM_Service/library/serviceapp/threshold_lower.py
@@ -0,0 +1,8 @@
 from threshold import Threshold
 class Threshold_Lower(Threshold) :
   def exceeds(self, value) :
      return self.threshold > value
   def can_breach(self) :
      return True
--- a/AoM_Service/library/serviceapp/threshold_upper.py
+++ b/AoM_Service/library/serviceapp/threshold_upper.py
@@ -0,0 +1,8 @@
 from threshold import Threshold
 class Threshold_Upper(Threshold) :
   def exceeds(self, value) :
      return self.threshold < value
   def can_breach(self) :
      return True
--- a/AoM_Service/library/serviceapp/thresholds.py
+++ b/AoM_Service/library/serviceapp/thresholds.py
@@ -0,0 +1,67 @@
 from threshold_upper import Threshold_Upper
 from threshold_lower import Threshold_Lower
 from threshold import Threshold
 class Thresholds() :
   WARNING = True
   CRITICAL = False
   UPPER = True
   LOWER = False
   def __init__(self, alert_config) :
      self.alert_config = alert_config
      self.thresholds = {}
      for level in [ Thresholds.WARNING, Thresholds.CRITICAL ] :
         self.thresholds[level] = {}
         for end in [ Thresholds.UPPER, Thresholds.LOWER ] :
            constructor = Threshold_Upper
            if end == Thresholds.LOWER :
               constructor = Threshold_Lower
            self.thresholds[level][end] = self.create_threshold(end, level, constructor)
   def create_threshold(self, isUpper, isWarning, constructor) :
      value, has = self.alert_config.get_threshold(isUpper, isWarning)
      if not has :
         constructor = Threshold
      return constructor(value)
   def warning_breached(self) :
      return self.level_breached(Thresholds.WARNING)
   def critical_breached(self) :
      return self.level_breached(Thresholds.CRITICAL)
   def upper_breached(self) :
      return self.end_breached(Thresholds.UPPER)
   def lower_breached(self) :
      return self.end_breached(Thresholds.LOWER)
   def level_breached(self, level) :
      return self.get_breached(level=level)
   def end_breached(self, end) :
      return self.get_breached(end=end)
   def can_breach(self) :
      can_breach = [t for t in self.thresholds.get_thresholds_matching() if not type(t) is Threshold]
      return len(can_breach) > 0
   def get_breached(self, level=None, end=None) :
      for threshold in self.get_thresholds_matching(level=level, end=end) :
         if threshold.get_breached() :
            return True
      return False
   def set_breached(self, min_value, max_value) :
      for threshold in self.get_thresholds_matching(end=Thresholds.LOWER) :
         threshold.set_breached(min_value)
      for threshold in self.get_thresholds_matching(end=Thresholds.UPPER) :
         threshold.set_breached(max_value)
   def get_thresholds_matching(self, level=None, end=None) :
      for l in self.thresholds :
         if level is None or l == level :
            for e in self.thresholds[l] :
               if end is None or e == end :
                  yield self.thresholds[l][e]
--- a/AoM_Service/library/test_alert_config.py
+++ b/AoM_Service/library/test_alert_config.py
@@ -0,0 +1,14 @@
 import unittest
 class Test_Alert_Config(unittest.TestCase):
   def test(self) :
      from alert_config import Alert_Config
      try :
         Alert_Config(None)
         self.fail("did not fail on nil yaml_config")
      except Exception :
         pass
      self.assertEqual("a", Alert_Config({"id":"a"}).id)
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/test_alert_config_list.py
+++ b/AoM_Service/library/test_alert_config_list.py
@@ -0,0 +1,54 @@
 import unittest
 import alert_config_list
 class Mock_Alert_Config() :
   def __init__(self, id) :
      self.id = id
 class Test_Alert_Config_List(unittest.TestCase):
   def setUp(self) :
      self.was = alert_config_list.Alert_Config
      alert_config_list.Alert_Config = Mock_Alert_Config
      self.al = alert_config_list.Alert_Config_List()
   def tearDown(self) :
      alert_config_list.Alert_Config = self.was
      self.al = None
   def test_add(self) :
      self.al.add(Mock_Alert_Config("a"))
      self.assertEqual(len(self.al), 1)
      self.al.add([Mock_Alert_Config("a")])
      self.assertEqual(len(self.al), 1)
      self.al.add([Mock_Alert_Config("b")])
      self.assertEqual(len(self.al), 2)
      self.al.add(Mock_Alert_Config("c"))
      self.assertEqual(len(self.al), 3)
      other = alert_config_list.Alert_Config_List()
      other.add(Mock_Alert_Config("d"))
      self.al.add(other)
      self.assertEqual(len(self.al), 4)
   def test_compare(self) :
      self.al.add(Mock_Alert_Config("a"))
      self.al.add(Mock_Alert_Config("b"))
      self.al.add(Mock_Alert_Config("c"))
      new = alert_config_list.Alert_Config_List()
      new.add(Mock_Alert_Config("a"))
      new.add(Mock_Alert_Config("y"))
      new.add(Mock_Alert_Config("z"))
      added, removed, modified = self.al.compare(new)
      if not "y" in added or not "z" in added :
         self.fail("added is missing elements")
      if not "b" in removed or not "c" in removed :
         self.fail("removed is missing elements")
      if not "a" in modified :
         self.fail("modified is missing elements")
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/test_job.py
+++ b/AoM_Service/library/test_job.py
@@ -0,0 +1,34 @@
 import unittest
 import job
 class Mock_Subprocess() :
   called = False
   joined = False
   pid = None
   def __init__(self) :
      pass
   def call(self, *args, **kwargs) :
      self.called = True
   def join(self, *args, **kwargs) :
      self.joined = True
 class Test_Job(unittest.TestCase):
   def setUp(self) :
      self.was = job.subprocess
      self.subprocess = Mock_Subprocess()
      job.subprocess = self.subprocess
   def tearDown(self) :
      job.subprocess = self.was
   def test(self) :
      p = Mock_Subprocess()
      j = job.Job("id", p)
      j.kill()
      self.assertEqual(p.joined, True)
      self.assertEqual(self.subprocess.called, True)
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/test_job_list.py
+++ b/AoM_Service/library/test_job_list.py
@@ -0,0 +1,50 @@
 import unittest
 import job_list
 class Mock_Job() :
   def __init__(self, id, p) :
      self.id = id
   def kill(self) :
      return
 class Test_Job_List(unittest.TestCase):
   def setUp(self) :
      self.was = job_list.Job
      job_list.Job = Mock_Job
   def tearDown(self) :
      job_list.Job = self.was
   def test_add(self) :
      jl = job_list.Job_List()
      self.assertEqual(len(jl), 0)
      try :
         jl.add(None)
         self.fail("can add nil to job_list")
      except Exception :
         pass
      jl.add(Mock_Job("a", "a"))
      self.assertEqual(len(jl), 1)
      jl.add(Mock_Job("a", "a"))
      self.assertEqual(len(jl), 1)
      jl.add(Mock_Job("b", "b"))
      self.assertEqual(len(jl), 2)
      other = job_list.Job_List()
      other.add(Mock_Job("b", "b"))
      other.add(Mock_Job("c", "c"))
      jl.add(other)
      self.assertEqual(len(jl), 3)
      jl.kill("a")
      self.assertEqual(len(jl), 2)
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/test_process.py
+++ b/AoM_Service/library/test_process.py
@@ -0,0 +1,31 @@
 import unittest
 import process
 class Mock_Multiprocessing():
   def __init__(self, *args, **kwargs) :
      self.args = args
      self.kwargs = kwargs
   def get_target(self) :
      return None
 class Mock_Alert_Config() :
   def __init__(self, id) :
      self.id = id
 class Test_Process(unittest.TestCase):
   def setUp(self) :
      self.was = process.multiprocessing.Process
      process.multiprocessing.Process = Mock_Multiprocessing
   def tearDown(self) :
      process.multiprocessing.Process = self.was
   def test(self) :
      class MockProcess(process.Process) :
         def get_target(self) :
            return None
      p = MockProcess(Mock_Alert_Config("a"), {}, None, True)
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/test_process_factory.py
+++ b/AoM_Service/library/test_process_factory.py
@@ -0,0 +1,36 @@
 import unittest
 import process_factory
 class Mock_Process_Prometheus() :
   def __init__(self, *args, **kwargs) :
      pass
 class Mock_Process_Kairos() :
   def __init__(self, *args, **kwargs) :
      pass
 class Mock_Alert_Config() :
   def __init__(self, type) :
      self.t = type
   def type(self) :
      return self.t
 class Test_Process_Factory(unittest.TestCase):
   def setUp(self) :
      self.was_prom = process_factory.process_prometheus.Process_Prometheus
      self.was_kai = process_factory.process_kairos.Process_Kairos
      process_factory.process_prometheus.Process_Prometheus = Mock_Process_Prometheus
      process_factory.process_kairos.Process_Kairos = Mock_Process_Kairos
   def tearDown(self) :
      process_factory.process_prometheus.Process_Prometheus = self.was_prom
      process_factory.process_kairos.Process_Kairos = self.was_kai
   def test(self) :
      factory = process_factory.Process_Factory(None, None, None)
      self.assertTrue(type(factory.build(Mock_Alert_Config("a"))) is Mock_Process_Kairos)
      self.assertTrue(type(factory.build(Mock_Alert_Config("prometheus"))) is Mock_Process_Prometheus)
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/test_process_kairos.py
+++ b/AoM_Service/library/test_process_kairos.py
@@ -0,0 +1,15 @@
 import unittest
 class Mock_Alert_Config() :
   def __init__(self, id) :
      self.id = id
 class Test_Process_Kairos(unittest.TestCase):
   def test(self) :
      import process_kairos
      from serviceapp import service
      p = process_kairos.Process_Kairos(Mock_Alert_Config("a"), None, None, None)
      self.assertEqual(p.get_target(), service.check_kairosdb_alert)
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/test_process_prometheus.py
+++ b/AoM_Service/library/test_process_prometheus.py
@@ -0,0 +1,15 @@
 import unittest
 class Mock_Alert_Config() :
   def __init__(self, id) :
      self.id = id
 class Test_Process_Prometheus(unittest.TestCase):
   def test(self) :
      import process_prometheus
      from serviceapp import service
      p = process_prometheus.Process_Prometheus(Mock_Alert_Config("a"), None, None, None)
      self.assertEqual(p.get_target(), service.check_prometheus_alert)
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/test_service.py
+++ b/AoM_Service/library/test_service.py
@@ -0,0 +1,100 @@
 import unittest
 from serviceapp import service as serviceapp
 import time
 import config
 import service
 class Mock_ServiceApp_Service() :
   def __init__(self, *args, **kwargs) :
      self.args = args
      self.kwargs = kwargs
   def send_stat(self, *args, **kwargs) :
      return
 class Mock_Logger() :
   def __init__(self) :
      self.lines = []
      self.info = self.log
      self.warn = self.log
      self.warning = self.log
      self.debug = self.log
      self.error = self.log
   def log(self, *args, **kwargs) :
      self.lines.append("{}, {}".format(args, kwargs))
      print(self.lines[-1])
 def Mock_Sleep(t) :
   return
 def Mock_Get_Healthy(*args, **kwargs) :
   return 0, 1
 def Mock_Distribute_Configs(*args, **kwargs) :
   return True
 def Mock_Is_Valid(*args, **kwargs) :
   return True
 def ignore_warnings(test_func):
   import warnings
   def do_test(self, *args, **kwargs):
      with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         test_func(self, *args, **kwargs)
   return do_test
 class Test_Service(unittest.TestCase) :
   def setUp(self) :
      self.mock_serviceapp_service = Mock_ServiceApp_Service
      self.was_k = serviceapp.check_kairosdb_alert
      self.was_p = serviceapp.check_prometheus_alert
      self.was_service = service.service
      self.was_sleep = time.sleep
      self.was_get_healthy = config.get_healthy_nodes_and_index
      self.was_distribute = config.distribute_configs
      self.was_is_valid = config.is_valid
      serviceapp.check_kairosdb_alert = self.mock_serviceapp_service
      serviceapp.check_prometheus_alert = self.mock_serviceapp_service
      config.get_healthy_nodes_and_index = Mock_Get_Healthy
      config.distribute_configs = Mock_Distribute_Configs
      config.is_valid = Mock_Is_Valid
      serviceapp.sleep = Mock_Sleep
      service.sleep = Mock_Sleep
      time.sleep = Mock_Sleep
   def tearDown(self) :
      serviceapp = self.was_service
      serviceapp.check_kairosdb_alert = self.was_k
      serviceapp.check_prometheus_alert = self.was_p
      config.get_healthy_nodes_and_index = self.was_get_healthy
      config.distribute_configs = self.was_distribute
      config.is_valid = self.was_is_valid
      time.sleep = self.was_sleep
      serviceapp.sleep = self.was_sleep
      service.sleep = self.was_sleep
   @ignore_warnings
   def test(self) :
      import service
      logger = Mock_Logger()
      s = service.Service(logger, 100, "HOST", {
         "alert_folder": "./testdata",
         "alert_routing_config": {},
      })
      global first
      first = True
      def f() :
         global first
         is_first = first
         first = False
         return is_first
      def purge_stale(*args) :
         return
      s.is_running = f
      s.purge_stale = purge_stale
      s.start()
 if __name__ == "__main__" :
   unittest.main()
--- a/AoM_Service/library/testdata/engine.yaml
+++ b/AoM_Service/library/testdata/engine.yaml
@@ -0,0 +1,20 @@
 ---
 id: sleeper_agents_milleniumfalcon_engine_failing
 service: core
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 start_time: '-60'
 suppressed_occurrences_threshold: 24
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_engine_failing) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
 service_dependencies: ['fuel']
--- a/AoM_Service/library/testdata/fuel.yaml
+++ b/AoM_Service/library/testdata/fuel.yaml
@@ -0,0 +1,18 @@
 ---
 id: sleeper_agents_milleniumfalcon_fuellevel_low
 service: fuel
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 start_time: '-60'
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_fuellevel_low) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
--- a/AoM_Service/library/testdata/lightspeed.yaml
+++ b/AoM_Service/library/testdata/lightspeed.yaml
@@ -0,0 +1,20 @@
 ---
 id: sleeper_agents_milleniumfalcon_lightspeed_unavailable
 service: captain
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 start_time: '-60'
 suppressed_occurrences_threshold: 48
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_lightspeed_unavailable) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
 service_dependencies: ['core']
--- a/AoM_Service/library/testdata/shields.yaml
+++ b/AoM_Service/library/testdata/shields.yaml
@@ -0,0 +1,20 @@
 ---
 id: sleeper_agents_milleniumfalcon_shields_unavailable
 service: core
 alerts:
  slack:
  - '#breel_testing_alerts'
  vo:
  - gobs-mm
 critical_upper_threshold: 1.0
 interval: 5
 suppressed_occurrences_threshold: 54
 start_time: '-60'
 end_time: now
 prometheus_url: http://big-trickster.service.eng.consul:9090
 query_type: prometheus
 query: max(sleeper_agents_milleniumfalcon_shields_unavailable) by (dc)
 tags:
 - dc
 url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
 service_dependencies: ['fuel']
--- a/AoM_Service/publish.sh
+++ b/AoM_Service/publish.sh
@@ -0,0 +1,42 @@
 #!/bin/bash
 GIT_COMMIT=$(git rev-parse HEAD)
 if [[ $GIT_COMMIT == "" ]]; then
 	echo "--Missing required GIT_COMMIT var. Aborting..."
 	exit 1
 fi
 #Setup useful vars
 team="engvis"
 app="alert-on-metrics-app"
 registryV2="registry-app.eng.qops.net:5001"
 pathV2="${registryV2}/${team}/${app}"
 commitV2="${pathV2}:${GIT_COMMIT}"
 latestV2="${pathV2}:latest"
 # In case you use relative paths
 DIR=$(cd $(dirname $BASH_SOURCE[0]) && pwd)
 cd $DIR
 echo "--Publishing $app $GIT_COMMIT"
 echo "--Removing old image, so they don't accumulate"
 docker rmi $latestV2
 #Now fail if anything doesn't work
 set -e
 if [ -f $app/build.sh ]
 then
    echo "--Running pre build steps"
    $app/build.sh
 fi
 docker build --pull=true --tag="$commitV2" --tag "$latestV2" .
 echo "--Publishing app container"
 docker push $commitV2
 docker push $latestV2
--- a/AoM_Service/qvolution.sh
+++ b/AoM_Service/qvolution.sh
@@ -0,0 +1,48 @@
 function _get_and_save_secret() {
   function is_set() {
      local name="$1"
      eval "echo \$$name" | grep . > /dev/null
   }
   local name="$1"
   eval "$name=\${$name:-}"
   if ! is_set $name; then
      eval "$name=$(security find-generic-password -a $USER -s $name -w 2> /dev/null)"
      if ! is_set "$name"; then
         eval "read -s -p 'Enter $name: ' $name" >&2
         eval "security add-generic-password -a $USER -s $name -w \$$name" >&2
         echo "" >&2
      fi
   fi
   eval "echo \$$name"
 }
 function get_and_save_secret() {
   _get_and_save_secret "$@" | tail -n 1
 }
 SENSU_API_USER="$(get_and_save_secret SENSU_API_USER)"
 SENSU_API_PASS="$(get_and_save_secret SENSU_API_PASS)"
 SLACK_API_TOKEN="$(get_and_save_secret SLACK_API_TOKEN)"
 echo SENSU_USER=$SENSU_API_USER >&2
 echo SENSU_PASS=$SENSU_API_PASS >&2
 echo SLACK_TOKEN=$SLACK_API_TOKEN >&2
 git submodule update --remote
 rm -rf alert_configs
 cp -r AoM_Configs/alert_configs .
 docker build -t aom:dev .
 docker rm -f aom
 docker run \
   -e SLACK_API_TOKEN=${SLACK_API_TOKEN} \
   -e API_USER=$SENSU_API_USER \
   -e API_PASS=$SENSU_API_PASS \
   --rm \
   -d \
   -p 8080:8080  \
   --add-host telegraf:10.4.13.53 \
   --name aom \
   --add-host consul.service.consul:127.0.0.1 \
   -h 127.0.0.1 \
   aom:dev &
 until curl localhost:8080/healthcheck; do sleep 1; done
 docker logs -f aom
--- a/AoM_Service/requirements.txt
+++ b/AoM_Service/requirements.txt
@@ -0,0 +1,7 @@
 PyYAML
 pip
 setuptools
 requests
 pyaml
 sanic
 statsd-tags
--- a/AoM_Service/run.sh
+++ b/AoM_Service/run.sh
@@ -0,0 +1,63 @@
 #!/bin/ash
 (
   while true; do
      redis-server
      sleep 10
   done
 ) &
 /usr/src/app/echo-server &
 /usr/src/app/echo-server -p 443 &
 /usr/src/app/consul &
 # Default values
 KAIROSDB_URL=${KAIROSDB_URL:-http://kairosdb-metrics.service.eng.consul:8080/}
 SMTP_SERVER=${SMTP_SERVER:-internal-smtp1-app.eng.qops.net:2525}
 #SENSU_URL=${SENSU_URL:-https://sensu-api.eng.qops.net:443/results}
 #SLACK_TOKEN=${SLACK_TOKEN:-xoxb-76976722775-WY6vtKAk0SQEb8qcbFkLMV81}
 #VICTOROPS_URL=${VICTOROPS_URL:-https://alert.victorops.com/integrations/generic/20131114/alert/07f108fe-9183-45c3-a888-19e1432806c5/}
 #CONSUL_URL=${CONSUL_URL:-http://consul1-app.eng.qops.net:8500/v1/kv/service/alert-on-metrics/leader-lock}
 #AOM_GRAFANA_URL=${AOM_GRAFANA_URL:-https://grafana.eng.qops.net/d/000000113/alert-on-metrics?refresh=1m&orgId=1&var-dc=All&var-fqdn=All&from=now-6h&to=now&var-id=}
 #UCHIWA_URL=${UCHIWA_URL:-https://uchiwa-app.eng.qops.net/#/client/EngOps/AOM}
 SLACK_TOKEN=${SLACK_TOKEN:-na}
 VICTOROPS_URL=${VICTOROPS_URL:-http://localhost:41912/}
 CONSUL_URL=${CONSUL_URL:-http://localhost:41912/}
 AOM_GRAFANA_URL=${AOM_GRAFANA_URL:-http://localhost:41912/}
 UCHIWA_URL=${UCHIWA_URL:-http://localhost:41912/}
 SENSU_URL=${SENSU_URL:-http://localhost:41912}
 export AOM_GRAFANA_URL
 # Update config
 sed -i "s#{{{KAIROSDB_URL}}}#${KAIROSDB_URL}#g" service.yaml
 sed -i "s#{{{VICTOROPS_URL}}}#${VICTOROPS_URL}#g" service.yaml
 sed -i "s#{{{SLACK_TOKEN}}}#${SLACK_TOKEN}#g" service.yaml
 sed -i "s#{{{SMTP_SERVER}}}#${SMTP_SERVER}#g" service.yaml
 sed -i "s#{{{CONSUL_URL}}}#${CONSUL_URL}#g" service.yaml
 sed -i "s#{{{SENSU_URL}}}#${SENSU_URL}#g" service.yaml
 sed -i "s,{{{UCHIWA_URL}}},${UCHIWA_URL},g" service.yaml
 # Starting service
 if [ -n "${TEST}" ]; then
    sed -i '/alert_reload_interval:/ s/[0-9]\+/30/g' service.yaml
    python3 /usr/src/app/aom_service.py &
    sleep 17
    echo "Making current server leader"
    curl localhost:8080/override?enable=true
    echo "Starting the service"
    curl localhost:8080/healthcheck
    exec python3 /usr/src/app/aom_test.py
    if [ $? -ne 0 ]; then
        cat /usr/src/app/logs/aom_service.log
        echo "Test failed!"
        exit 1
    else
        cat /usr/src/app/logs/aom_service.log
        echo "Test succeeded. Exiting"
        exit 0
    fi
 else
    exec python3 /usr/src/app/reporter/incoming/main.py &
    exec python3 /usr/src/app/aom_service.py
 fi
--- a/AoM_Service/service.yaml
+++ b/AoM_Service/service.yaml
@@ -0,0 +1,27 @@
 #=======================#
 # All them URLS and tokens
 #=======================#
 kairosdb_url: "{{{KAIROSDB_URL}}}"
 victorops_url: "{{{VICTOROPS_URL}}}"
 slack_url: "https://slack.com/api/chat.postMessage"
 slack_token: "{{{SLACK_TOKEN}}}"
 smtp_server: "{{{SMTP_SERVER}}}"
 consul_url: "{{{CONSUL_URL}}}"
 sensu_endpoint: "{{{SENSU_URL}}}"
 uchiwa_url: "{{{UCHIWA_URL}}}"
 #=======================#
 # Logging Information
 #=======================#
 log_path: "logs/aom_service.log"
 #=======================#
 # alerts configurations
 #=======================#
 alert_folder: "alert_configs"
 alert_routing_lookup: "alert_routing_lookup"
 alert_reload_interval: 300
 #=======================#
 # request timeout value
 #=======================#
 timeout: 90
--- a/mock/consul/main.go
+++ b/mock/consul/main.go
@@ -0,0 +1,82 @@
 package main
 import (
 	"flag"
 	"fmt"
 	"log"
 	"net/http"
 	"os"
 	"strings"
 )
 func main() {
 	p := os.Getenv("PORT")
 	flag.StringVar(&p, "p", "8500", "port to listen on")
 	flag.Parse()
 	http.Handle("/v1/catalog/service/alert-on-metrics", http.HandlerFunc(catalogService))
 	http.Handle("/v1/health/node/127.0.0.1", http.HandlerFunc(healthNode))
 	log.Println("Listening on", p)
 	if err := http.ListenAndServe(":"+strings.TrimPrefix(p, ":"), nil); err != nil {
 		panic(err)
 	}
 }
 func healthNode(w http.ResponseWriter, r *http.Request) {
 	fmt.Fprintln(w, `
 [
   {
      "CheckID": "check_healthcheck_alert-on-metrics_alert-on-metrics",
      "CreateIndex": 727094265,
      "Definition": {},
      "ModifyIndex": 727094265,
      "Name": "Serf Health Status",
      "Node": "gobs2-nomad.b1-prv.qops.net",
      "Notes": "",
      "Output": "Agent alive and reachable",
      "ServiceID": "",
      "ServiceName": "",
      "ServiceTags": [],
      "Status": "passing"
   }
 ]
   `)
 }
 func catalogService(w http.ResponseWriter, r *http.Request) {
 	fmt.Fprintln(w, `
 [
   {
      "Address": "127.0.0.1",
      "CreateIndex": 231035602,
      "Datacenter": "eng",
      "ID": "95dace59-f06b-d483-a06e-38288dc2019a",
      "ModifyIndex": 231035602,
      "Node": "127.0.0.1",
      "NodeMeta": {
         "consul-network-segment": ""
      },
      "ServiceAddress": "",
      "ServiceConnect": {},
      "ServiceEnableTagOverride": false,
      "ServiceID": "alert-on-metrics",
      "ServiceKind": "",
      "ServiceMeta": {},
      "ServiceName": "alert-on-metrics",
      "ServicePort": 8080,
      "ServiceProxy": {},
      "ServiceProxyDestination": "",
      "ServiceTags": [
         ""
      ],
      "ServiceWeights": {
         "Passing": 1,
         "Warning": 1
      },
      "TaggedAddresses": {
         "lan": "127.0.0.1",
         "wan": "127.0.0.1"
      }
   }
 ]
   `)
 }
--- a/sandbox/isFiringRedis/config
+++ b/sandbox/isFiringRedis/config
@@ -0,0 +1,2 @@
 save
 appendonly no
--- a/sandbox/isFiringRedis/dump.rdb
+++ b/sandbox/isFiringRedis/dump.rdb
--- a/sandbox/isFiringRedis/main.py
+++ b/sandbox/isFiringRedis/main.py
@@ -0,0 +1,12 @@
 def main(args) :
   import redis
   redis = redis.Redis()
   k = "key"
   v = "value"
   print(redis.get(k).decode())
   redis.set(k, v)
   print(redis.get(k).decode())
 if __name__ == "__main__" :
   from sys import argv
   main(argv)
--- a/sleeper_agents_aom_engine/.gitignore
+++ b/sleeper_agents_aom_engine/.gitignore
@@ -0,0 +1,15 @@
 # Created by .ignore support plugin (hsz.mobi)
 ### Vagrant template
 .vagrant/
 .idea/
 build/results
 logs/
 *.pyc
 .dockerignore
 Dockerfile
 build/builder
 site-packages.tar.gz
 alert_configs
 AoM_Configs
--- a/Show More
+++ b/Show More