cold
This commit is contained in:
12
AoM_Service/AoM_Configs/.gitignore
vendored
Executable file
12
AoM_Service/AoM_Configs/.gitignore
vendored
Executable file
@@ -0,0 +1,12 @@
|
||||
|
||||
# ignore alert configs starting with underscore -- we can create the while testing the webapp
|
||||
# and not have to worry about them getting into the repo
|
||||
alert_configs/_*.yaml
|
||||
|
||||
*.swp
|
||||
.idea/
|
||||
.vagrant/
|
||||
__pycache__
|
||||
logs/
|
||||
venv/
|
||||
.vscode/
|
||||
68
AoM_Service/AoM_Configs/.jenkins/JenkinsFile
Executable file
68
AoM_Service/AoM_Configs/.jenkins/JenkinsFile
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env groovy
|
||||
pipeline {
|
||||
agent {label 'nomad-builder'}
|
||||
|
||||
environment {
|
||||
DOCKER_HOST = 'tcp://127.0.0.1:2375'
|
||||
WORKSPACE_PATH = "/var/lib/nomad/alloc/${NOMAD_ALLOC_ID}/${NOMAD_TASK_NAME}${WORKSPACE}"
|
||||
}
|
||||
stages {
|
||||
stage('Info') {
|
||||
steps {
|
||||
sh script: 'hostname'
|
||||
echo "WORKSPACE_PATH: $WORKSPACE_PATH"
|
||||
}
|
||||
}
|
||||
stage('Build') {
|
||||
steps {
|
||||
echo "No build required"
|
||||
}
|
||||
}
|
||||
stage('Test') {
|
||||
steps {
|
||||
echo "Test done already on merge request"
|
||||
//sh script: 'cd build; ./test_changed.sh "${WORKSPACE_PATH}"'
|
||||
// sh script: 'cd build; ./test_changed.sh'
|
||||
}
|
||||
}
|
||||
stage('Deploy') {
|
||||
steps {
|
||||
script {
|
||||
if ("$GIT_BRANCH" == "origin/master"){
|
||||
echo "Running publish script"
|
||||
sh script: './publish.sh'
|
||||
echo "Triggering Rundeck job"
|
||||
script {
|
||||
step([$class: 'RundeckNotifier', includeRundeckLogs: true, jobId: 'c1f0dd4e-89a0-411b-afbb-455421a2ba34', nodeFilters: '', options: '', rundeckInstance: 'team-rundeck -- techops', shouldFailTheBuild: true, shouldWaitForRundeckJob: true, tags: '', tailLog: false])
|
||||
}
|
||||
}
|
||||
else {
|
||||
echo "No deploy step required."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
success {
|
||||
gitlabCommitStatus(name: "$JOB_NAME") {
|
||||
// Test passed, update commit with green checkbox
|
||||
}
|
||||
// Notify Eng Viz of successful build
|
||||
// slackSend color: 'good', message: "Passed Build: $BUILD_URL", channel: '#eng-invisibility'
|
||||
}
|
||||
failure {
|
||||
gitlabCommitStatus(name: "$JOB_NAME") {
|
||||
// Test failed, update commit status with red x
|
||||
error("Build failed, check ${BUILD_URL} for details.")
|
||||
}
|
||||
// On failure send an email to Eng Vis
|
||||
mail body: 'Please check ${BUILD_URL} or details.',
|
||||
subject: 'Jenkins job ${JOB_NAME} build #${BUILD_NUMBER} failed',
|
||||
from: 'Jenkins',
|
||||
to: 'eng-visibility@qualtrics.com'
|
||||
// Finally send a warning message to Eng Vis slack channel
|
||||
slackSend color: 'warn', message: 'Failed Build: $BUILD_URL', channel: '#eng-invisibility'
|
||||
}
|
||||
}
|
||||
}
|
||||
56
AoM_Service/AoM_Configs/.jenkins/JenkinsFileMR
Executable file
56
AoM_Service/AoM_Configs/.jenkins/JenkinsFileMR
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env groovy
|
||||
pipeline {
|
||||
agent {label 'nomad-builder'}
|
||||
|
||||
environment {
|
||||
DOCKER_HOST = 'tcp://127.0.0.1:2375'
|
||||
WORKSPACE_PATH = "/var/lib/nomad/alloc/${NOMAD_ALLOC_ID}/${NOMAD_TASK_NAME}${WORKSPACE}"
|
||||
}
|
||||
stages {
|
||||
stage('Info') {
|
||||
steps {
|
||||
sh script: 'hostname'
|
||||
echo "WORKSPACE_PATH: $WORKSPACE_PATH"
|
||||
}
|
||||
}
|
||||
stage('Build') {
|
||||
steps {
|
||||
echo "No build required"
|
||||
}
|
||||
}
|
||||
stage('Test') {
|
||||
steps {
|
||||
echo "Running test"
|
||||
sh script: './test_changed.sh'
|
||||
sh script: 'python validate_yaml.py'
|
||||
}
|
||||
}
|
||||
stage('Deploy') {
|
||||
steps {
|
||||
echo "No deploy step required for Merge Request"
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
success {
|
||||
gitlabCommitStatus(name: "$JOB_NAME") {
|
||||
// Test passed, update commit with green checkbox
|
||||
}
|
||||
// Notify Eng Viz of successful build
|
||||
// slackSend color: 'good', message: "Passed Build: $BUILD_URL", channel: '#eng-invisibility'
|
||||
}
|
||||
failure {
|
||||
gitlabCommitStatus(name: "$JOB_NAME") {
|
||||
// Test failed, update commit status with red x
|
||||
error("Build failed, check ${BUILD_URL} for details.")
|
||||
}
|
||||
// On failure send an email to Eng Vis
|
||||
mail body: 'Please check ${BUILD_URL} or details.',
|
||||
subject: 'Jenkins job ${JOB_NAME} build #${BUILD_NUMBER} failed',
|
||||
from: 'Jenkins',
|
||||
to: 'eng-visibility@qualtrics.com'
|
||||
// Finally send a warning message to Eng Vis slack channel
|
||||
// slackSend color: 'warn', message: 'Failed Build: $BUILD_URL', channel: '#eng-invisibility'
|
||||
}
|
||||
}
|
||||
}
|
||||
16
AoM_Service/AoM_Configs/Dockerfile.webapp
Executable file
16
AoM_Service/AoM_Configs/Dockerfile.webapp
Executable file
@@ -0,0 +1,16 @@
|
||||
FROM registry-app.eng.qops.net:5001/imported/alpine:3.9
|
||||
MAINTAINER Engineering Visibility <eng-visibility@qualtrics.com>
|
||||
|
||||
COPY webapp_requirements.txt /
|
||||
COPY run_webapp.sh /
|
||||
|
||||
RUN apk add --no-cache python3 curl
|
||||
RUN apk add --no-cache --virtual .build-deps build-base python3-dev \
|
||||
&& pip3 install --no-cache-dir --upgrade pip \
|
||||
&& pip3 install --no-cache-dir --upgrade setuptools \
|
||||
&& pip3 install --no-cache-dir --upgrade -r /webapp_requirements.txt \
|
||||
&& apk del .build-deps \
|
||||
&& rm -rf /var/cache/apk/*
|
||||
|
||||
CMD ["/run_webapp.sh"]
|
||||
|
||||
236
AoM_Service/AoM_Configs/README.md
Executable file
236
AoM_Service/AoM_Configs/README.md
Executable file
@@ -0,0 +1,236 @@
|
||||
# README
|
||||
|
||||
This is the new repository for the Alert On Metrics project configurations.
|
||||
|
||||
Alert On Metrics (AOM) project allows one to setup alerts to trigger based on tracking a metric value as collected via [Metrics as a Service](https://odo.corp.qualtrics.com/wiki/index.php/Metrics_As_A_Service). You "track" your metric via a [KairosDB query](http://kairosdb-metrics.service.eng.consul:8080/) or [Prometheus query](http://big-trickster.service.eng.consul:9090/graph) so you are not limited to raw metrics - you can sample based on aggregators available in KairosDB to create new metrics views or use PromQL if you are using Prometheus. Typically people use min, max or count. All "tracked" metrics are rewritten to the metrics data store as a new metric *telgraf.aom_stats_value* but are tagged by Alert-On-Metrics to show their origin.
|
||||
|
||||
You can trigger an alert based on any combination of the following:
|
||||
|
||||
- An upper critical threshold based on the value of a metric increasing
|
||||
- An upper warning threshold based on the value of a metric increasing
|
||||
- A lower critical threshold based on the value of a metric decreasing
|
||||
- A lower warning threshold based on the value of a metric decreasing
|
||||
- Combine any lower and upper threshold to create a 'band'
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Sensu and alert subdue. NEW!
|
||||
|
||||
Some changes have been introduced into latest AOM versions. Now alerts
|
||||
can be sent through Sensu (email not supported yet). Using Sensu also
|
||||
allows to create check dependencies (vo is now victorops for Sensu).
|
||||
|
||||
```
|
||||
alerts:
|
||||
sensu:
|
||||
victorops:
|
||||
'blackhole'
|
||||
slack:
|
||||
'#aom_test_channel'
|
||||
dependencies:
|
||||
- name_of_check1
|
||||
- name_of_check2
|
||||
```
|
||||
|
||||
Also filters option has been enabled. It works the same way as in
|
||||
Hiera. If you only want to receive critical alerts through one channel
|
||||
you can set "channel"_subdue to **true**.
|
||||
Example:
|
||||
|
||||
```
|
||||
filters:
|
||||
slack_subdue: true
|
||||
victorops_subdue: false
|
||||
```
|
||||
You can make use of anything that sensu api supports. Anything you add
|
||||
to your configuration under sensu will be sent directly to the Sensu API.
|
||||
|
||||
---
|
||||
## Availability metric.
|
||||
|
||||
If you want to track how long your check is on CRITICAL state along a
|
||||
given period of time, you can enable this feature by setting this
|
||||
option to true:
|
||||
```
|
||||
availability: true
|
||||
```
|
||||
|
||||
This will start sending metrics constantly and recording the check
|
||||
output. You can then visualize this metric within the following
|
||||
[dashboard]
|
||||
(https://grafana.eng.qops.net/d/5OsrZSdiz/aom-availability?orgId=1)
|
||||
(or you can create your own).
|
||||
To get a more accurate result don't set the refresh interval lower
|
||||
than 60 seconds.
|
||||
|
||||
---
|
||||
## Routing per tag value. NEW!
|
||||
This feature allows you to configure a different alert routing using the values of tags in your metric. For instance, let's say you want to have a different alert policy for beta, gamma and prod:
|
||||
* *beta*: I want to alert my `#my-project-dev` channel
|
||||
* *gamma*: I want to alert my `#my-project-gamma` channel
|
||||
* *prod*: I want to alert my `#my-project` channel and page the on-call on VictorOps
|
||||
|
||||
We can use the `dc` tag available in the metric query, define specific configuration for beta and gamma, and use a default one for all other values (prod in this case). Everything is configured inside the `alerts` object in the yaml configuration. Instead of directly adding the alert configuration, add a `lookup` key. Inside, you have to provide three values:
|
||||
* `default`: the alert policy to apply by default if we can't find a configuration for a specific combination of tags. The format is the exact same as classic alerts (sensu, vo, slack, etc.).
|
||||
* `tags`: the tags that will be used to lookup the alert routing configuration. You can use more than one tag.
|
||||
* `lookups`: an array, where each element specifies a combination of tag values and the routing to apply in this case.
|
||||
|
||||
Here is the configuration of our example:
|
||||
```yaml
|
||||
alerts:
|
||||
lookup:
|
||||
default:
|
||||
sensu:
|
||||
slack: my-project
|
||||
victorops: my-on-call-key
|
||||
tags:
|
||||
- dc
|
||||
lookups:
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: my-project-dev
|
||||
tags:
|
||||
dc: b1-prv
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: my-project-gamma
|
||||
tags:
|
||||
dc: g1-iad
|
||||
```
|
||||
|
||||
You can move the `lookups` part inside a separate file, so it can be reused accross different AOM configurations. To do that, instead of a `lookups` key, provide a `lookup_file` with the filename, including the extension:
|
||||
```yaml
|
||||
alerts:
|
||||
lookup:
|
||||
default: ...
|
||||
lookup_file: my_lookup_file.yaml
|
||||
tags: ...
|
||||
|
||||
```
|
||||
|
||||
Save this file under the `alert_routing_lookup` folder. The syntax for the alert routing is the same as before, it is just in a different file:
|
||||
```yaml
|
||||
---
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: my-project-dev
|
||||
tags:
|
||||
dc: b1-prv
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: my-project-gamma
|
||||
tags:
|
||||
dc: g1-iad
|
||||
```
|
||||
|
||||
---
|
||||
## How do I register a new alert with AOM?
|
||||
|
||||
Alert configurations for AOM are just a Kairos DB or Prometheus query
|
||||
specified in a yaml format and wrapped in some controlling
|
||||
configuration that determines how frequently the query is executed,
|
||||
thresholds, occurrences and where to route the alerts. We have built a
|
||||
small UI that is packaged with the AOM gitlab project that will help
|
||||
you generate a suitable yaml configuration. You can rehearse your
|
||||
queries on the [KairosDB UI]
|
||||
(http://kairosdb-metrics.service.eng.consul:8080/) or at any
|
||||
Prometheus endpoint and take a look at other examples in the alert_configs/ folder for help.
|
||||
|
||||
Follow the instructions below to launch the yaml generator UI on your
|
||||
local desktop and use it to generate a merge request (Docker is
|
||||
necessary).
|
||||
|
||||
1. Clone the project
|
||||
2. cd into the project's directory
|
||||
3. Run the script ./generate_config.sh
|
||||
4. Once up, navigate in a browser to **localhost:80/**
|
||||
5. Fill out the form and click generate
|
||||
6. Hit **Crlt+C** when you have the alert configuration
|
||||
7. Submit the merge request in a new branch
|
||||
|
||||
---
|
||||
This process will starts a local webserver that provides a convenient interface for generating the yaml you need.
|
||||
Most of the fields have helpful info tips on what each value is and how it's used.
|
||||
|
||||
---
|
||||
## Visualization tool [BETA]
|
||||
Along with the project, a simple python script to show how your
|
||||
metrics will look like and to help you setting the thresholds, is
|
||||
provided. This tool requires the installation of python3 and some
|
||||
additional python3 modules:
|
||||
1. yaml
|
||||
2. json
|
||||
3. requests
|
||||
4. numpy
|
||||
5. matplotlib
|
||||
|
||||
These modules should be easy to install using 'pip' or 'homebrew'.
|
||||
|
||||
Usage:
|
||||
```python3 show_config.py [X] alertname_without_yaml_extension```
|
||||
|
||||
Where X is an optional parameter to define the interval lenght you
|
||||
want to display. It's a multiplier factor, set to 10 by default, that
|
||||
will increase the start_relative (so you will see more datapoints).
|
||||
|
||||
The script should open a window showing the metrics along the defined
|
||||
thresholds. If the query doesn't return any value, it will exit.
|
||||
|
||||
---
|
||||
|
||||
## How does my new alert get to production?
|
||||
|
||||
Once you submit a merge request, a Jenkins' job will quickly validate your alert
|
||||
files just checking it contains all required fields and proper syntax. Setting up
|
||||
appropriate thresholds and alerting channels (VictorOps, email,
|
||||
Slack) is user's responsibility.
|
||||
|
||||
If Jenkins returns a PASS result for the test, new alert files will be
|
||||
merged into the master branch and a deploy job will be triggered (also
|
||||
from Jenkins). AOM service will be actively looking for changes in the
|
||||
alert_configs folder and will pick up any changes (by default every
|
||||
300 seconds).
|
||||
|
||||
## Helpful Tidbits
|
||||
|
||||
__IMPORTANT:__ The alert id field must be unique, it might be useful running the
|
||||
grep command within the alert_configs directory to ensure it's not
|
||||
already defined.
|
||||
|
||||
Use the [UI](http://kairosdb-metrics.service.eng.consul:8080/) on the kairosdb box to help you generate / determine the proper query.
|
||||
Remember, you want to get the query down to just one or 2 entries per *group-by* so that the service can quickly iterate over it.
|
||||
|
||||
Once the request has been merged you can check if your query is getting processed by [hitting the url](http://alert-on-metrics.service.eng.consul:8080/healthcheck?verbose=true)
|
||||
|
||||
You can also check out the [grafana dashboard](http://grafana-metrics.service.eng.consul:3000/dashboard/db/alert-on-metrics) that has the results of this service's queries and verify your alert metric is showing up regularly.
|
||||
|
||||
From KairosDB's doc: *You must specify either start_absolute or start_relative but not
|
||||
both. Similarly, you may specify either end_absolute or end_relative
|
||||
but not both. If either end time is not specified the current date and
|
||||
time is assumed.* We suggest the usage of *end_relative* (greater than
|
||||
1 minute) as this will make steadier graphs (if you draw a graph until
|
||||
*Now*, some of the latest metrics could be missing so the end of the
|
||||
graph will be lower than it should).
|
||||
|
||||
We do not recommend using *align_sampling* and *align_start_time*
|
||||
(both false by default so can be skipped) as they might change the alignment of metrics
|
||||
and change graphs over time (*If more than one are set, unexpected results will occur*).
|
||||
|
||||
If you have any doubt about KairosDB's query metrics you can take a look at their documentation [here](https://kairosdb.github.io/docs/build/html/restapi/QueryMetrics.html).
|
||||
|
||||
---
|
||||
|
||||
## The Gotchas
|
||||
|
||||
1. Alerts only fire when KairosDB returns a result. If your KairosDB metric query returns no results for X (currently 10) attempts any active alerts will clear with a message explaining that AOM could not get any further results from KairosDB so user must manually verify RECOVERY. Earlier versions of AOM had no flap protection like this built in. Long term we will move alerting to Sensu which has more advanced built in flap protection. You can reduce flapping of results by building your Kairos query well. Please talk to engineering visibility for help with this.
|
||||
2. Metrics are only collected every 60 seconds, so setting an interval below that will automatically get bumped up to 60 seconds from the web based config generation. Match up the interval by how often the metric is collected and measured
|
||||
3. The Email field only requires a list of names, and not the @qualtrics bit, as it will only send to qualtrics addresses using the internal-smtp1-app.eng.qops.net box
|
||||
4. Email and Slack alerts fire once during an event. This way if an outtage was occuring, you wouldn't get flooded with emails and slack alerts the entire time.
|
||||
5. Email and Slack alerts can be helpful to share with the team so they are aware of what is happening.
|
||||
6. Email and Slack alerts can be helpful when trying to figure out your alerts before you VO stuff
|
||||
|
||||
20
AoM_Service/AoM_Configs/alert_configs/engine.yaml
Executable file
20
AoM_Service/AoM_Configs/alert_configs/engine.yaml
Executable file
@@ -0,0 +1,20 @@
|
||||
---
|
||||
id: sleeper_agents_milleniumfalcon_engine_failing
|
||||
service: core
|
||||
alerts:
|
||||
slack:
|
||||
- '#breel_testing_alerts'
|
||||
vo:
|
||||
- gobs-mm
|
||||
critical_upper_threshold: 1.0
|
||||
interval: 5
|
||||
start_time: '-60'
|
||||
suppressed_occurrences_threshold: 24
|
||||
end_time: now
|
||||
prometheus_url: http://big-trickster.service.eng.consul:9090
|
||||
query_type: prometheus
|
||||
query: max(sleeper_agents_milleniumfalcon_engine_failing) by (dc)
|
||||
tags:
|
||||
- dc
|
||||
url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
|
||||
service_dependencies: ['fuel']
|
||||
18
AoM_Service/AoM_Configs/alert_configs/fuel.yaml
Executable file
18
AoM_Service/AoM_Configs/alert_configs/fuel.yaml
Executable file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
id: sleeper_agents_milleniumfalcon_fuellevel_low
|
||||
service: fuel
|
||||
alerts:
|
||||
slack:
|
||||
- '#breel_testing_alerts'
|
||||
vo:
|
||||
- gobs-mm
|
||||
critical_upper_threshold: 1.0
|
||||
interval: 5
|
||||
start_time: '-60'
|
||||
end_time: now
|
||||
prometheus_url: http://big-trickster.service.eng.consul:9090
|
||||
query_type: prometheus
|
||||
query: max(sleeper_agents_milleniumfalcon_fuellevel_low) by (dc)
|
||||
tags:
|
||||
- dc
|
||||
url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
|
||||
20
AoM_Service/AoM_Configs/alert_configs/lightspeed.yaml
Executable file
20
AoM_Service/AoM_Configs/alert_configs/lightspeed.yaml
Executable file
@@ -0,0 +1,20 @@
|
||||
---
|
||||
id: sleeper_agents_milleniumfalcon_lightspeed_unavailable
|
||||
service: captain
|
||||
alerts:
|
||||
slack:
|
||||
- '#breel_testing_alerts'
|
||||
vo:
|
||||
- gobs-mm
|
||||
critical_upper_threshold: 1.0
|
||||
interval: 5
|
||||
start_time: '-60'
|
||||
suppressed_occurrences_threshold: 48
|
||||
end_time: now
|
||||
prometheus_url: http://big-trickster.service.eng.consul:9090
|
||||
query_type: prometheus
|
||||
query: max(sleeper_agents_milleniumfalcon_lightspeed_unavailable) by (dc)
|
||||
tags:
|
||||
- dc
|
||||
url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
|
||||
service_dependencies: ['core']
|
||||
20
AoM_Service/AoM_Configs/alert_configs/shields.yaml
Executable file
20
AoM_Service/AoM_Configs/alert_configs/shields.yaml
Executable file
@@ -0,0 +1,20 @@
|
||||
---
|
||||
id: sleeper_agents_milleniumfalcon_shields_unavailable
|
||||
service: core
|
||||
alerts:
|
||||
slack:
|
||||
- '#breel_testing_alerts'
|
||||
vo:
|
||||
- gobs-mm
|
||||
critical_upper_threshold: 1.0
|
||||
interval: 5
|
||||
suppressed_occurrences_threshold: 54
|
||||
start_time: '-60'
|
||||
end_time: now
|
||||
prometheus_url: http://big-trickster.service.eng.consul:9090
|
||||
query_type: prometheus
|
||||
query: max(sleeper_agents_milleniumfalcon_shields_unavailable) by (dc)
|
||||
tags:
|
||||
- dc
|
||||
url: https://grafana.eng.qops.net/d/000000390/geni?orgId=1
|
||||
service_dependencies: ['fuel']
|
||||
@@ -0,0 +1,8 @@
|
||||
---
|
||||
-
|
||||
alert:
|
||||
slack:
|
||||
- "public-api-deploy-tst"
|
||||
tags:
|
||||
canaryTest: transaction_import_distribution_1
|
||||
targetdc: fra1
|
||||
@@ -0,0 +1,365 @@
|
||||
---
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
tags:
|
||||
brandId: aexpfeedback
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: emea-alerts
|
||||
victorops: profserv-19
|
||||
tags:
|
||||
brandId: airbuswea
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alaskaair
|
||||
tags:
|
||||
brandId: alaskaair
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-seattle-3
|
||||
victorops: xmp-seattle-3
|
||||
tags:
|
||||
brandId: amdocs
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-sea-automations
|
||||
tags:
|
||||
brandId: americanairlines
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-sea-automations
|
||||
tags:
|
||||
brandId: anz
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-seattle-3
|
||||
victorops: xmp-seattle-3
|
||||
tags:
|
||||
brandId: arris
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: emea-alerts
|
||||
victorops: profserv-19
|
||||
tags:
|
||||
brandId: baincx
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-sea-automations
|
||||
tags:
|
||||
brandId: bmocx
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
victorops: es-bmw-marriott
|
||||
tags:
|
||||
brandId: bmwgroupne
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
victorops: es-bmw-marriott
|
||||
tags:
|
||||
brandId: bmwgroupnest3
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
victorops: es-bmw-marriott
|
||||
tags:
|
||||
brandId: bmwjapan
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
victorops: es-bmw-marriott
|
||||
tags:
|
||||
brandId: bmwjapanst3
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
victorops: es-bmw-marriott
|
||||
tags:
|
||||
brandId: bmwna
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
victorops: es-bmw-marriott
|
||||
tags:
|
||||
brandId: bmwnast3
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
victorops: es-bmw-marriott
|
||||
tags:
|
||||
brandId: bmwvertriebsgmbh
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
victorops: es-bmw-marriott
|
||||
tags:
|
||||
brandId: bmwvertriebsgmbhst3
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: caterpillar
|
||||
victorops: profserv-14
|
||||
tags:
|
||||
brandId: catcustomerinsights
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: century-link
|
||||
victorops: xmp-seattle-4
|
||||
tags:
|
||||
brandId: centurylink
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-seattle-4
|
||||
victorops: xmp-seattle-4
|
||||
tags:
|
||||
brandId: ciscoengineering
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
victorops: profserv
|
||||
tags:
|
||||
brandId: clientdashboards
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
victorops: profserv
|
||||
tags:
|
||||
brandId: cms
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: TODO
|
||||
tags:
|
||||
brandId: cocacolaperform
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: dish
|
||||
tags:
|
||||
brandId: dishvoc
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
tags:
|
||||
brandId: dowcorning
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
victorops: profserv
|
||||
tags:
|
||||
brandId: drtoddhall
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-gs-compare
|
||||
victorops: xmp-seattle-3
|
||||
tags:
|
||||
brandId: goldmansachs
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-sea-automations
|
||||
tags:
|
||||
brandId: harvard
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
victorops: profserv
|
||||
tags:
|
||||
brandId: ibm
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-seattle-3
|
||||
victorops: xmp-seattle-3
|
||||
tags:
|
||||
brandId: jcibuildings
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-seattle-3
|
||||
victorops: xmp-seattle-3
|
||||
tags:
|
||||
brandId: johnsoncontrols2
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
victorops: profserv
|
||||
tags:
|
||||
brandId: kubota
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: liberty-mutual
|
||||
tags:
|
||||
brandId: libertymutualvoc
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-qe-alerts
|
||||
victorops: es-bmw-marriott
|
||||
tags:
|
||||
brandId: marriottvacationclub
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
tags:
|
||||
brandId: mastercard
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-seattle-4
|
||||
victorops: xmp-seattle-4
|
||||
tags:
|
||||
brandId: nielsenapac
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: TODO
|
||||
tags:
|
||||
brandId: optumrx
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-seattle-4
|
||||
victorops: xmp-seattle-4
|
||||
tags:
|
||||
brandId: nielsenscarborough
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-seattle-3
|
||||
victorops: xmp-seattle-3
|
||||
tags:
|
||||
brandId: rogers
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
tags:
|
||||
brandId: samsungeurope
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: emea-alerts
|
||||
victorops: profserv-19
|
||||
tags:
|
||||
brandId: telenorreporting
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
victorops: profserv
|
||||
tags:
|
||||
brandId: thermoking
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: philips-es
|
||||
tags:
|
||||
brandId: tnsnipophilips
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: travelers_coord
|
||||
victorops: profserv-14
|
||||
tags:
|
||||
brandId: travelers
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-sea-automations
|
||||
tags:
|
||||
brandId: uhcdr
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-sea-automations
|
||||
tags:
|
||||
brandId: uhcmr
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-sea-automations
|
||||
tags:
|
||||
brandId: uhcgm
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: TODO
|
||||
tags:
|
||||
brandId: uhg
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-sea-automations
|
||||
tags:
|
||||
brandId: uhg1
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
victorops: profserv
|
||||
tags:
|
||||
brandId: underarmour
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
victorops: profserv
|
||||
tags:
|
||||
brandId: unum
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: TODO
|
||||
tags:
|
||||
brandId: usaast3
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-sea-automations
|
||||
tags:
|
||||
brandId: usbank
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: es-alerts
|
||||
victorops: profserv
|
||||
tags:
|
||||
brandId: uscd
|
||||
-
|
||||
alert:
|
||||
sensu:
|
||||
slack: xmp-seattle-3
|
||||
victorops: xmp-seattle-3
|
||||
tags:
|
||||
brandId: walkersandbox
|
||||
30
AoM_Service/AoM_Configs/aom_webapp.py
Executable file
30
AoM_Service/AoM_Configs/aom_webapp.py
Executable file
@@ -0,0 +1,30 @@
|
||||
#! /usr/bin/python3
|
||||
# aom_builder.py
|
||||
# point of the builder is to generate a valid yaml config that could be read in to the main app by
|
||||
# asking for clarifying questions on what to check and how to alert on it
|
||||
# this comes from 4 questions:
|
||||
# When to query
|
||||
# What to query for
|
||||
# Whats an alert
|
||||
# Who to Alert
|
||||
|
||||
from webapp import app
|
||||
from library.logger import AlertLogging
|
||||
from library.args import get_builder_args
|
||||
|
||||
log = AlertLogging('aom')
|
||||
log.start()
|
||||
log.start_log_file("logs/aom_builder.log")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# GET ARGS AND START LOGGING
|
||||
args = get_builder_args()
|
||||
# logger.init("logs/aom_builder.log", args['log_level'])
|
||||
# aom_logger = logging.getLogger(__name__)
|
||||
log.info("Logger Initialized")
|
||||
# ENABLE SESSIONS TO KEEP YAML FILE STATE BETWEEN PAGES
|
||||
log.info("Starting webapp")
|
||||
app.run(host='localhost', port=args['port'], debug=True)
|
||||
|
||||
|
||||
16
AoM_Service/AoM_Configs/generate_config.sh
Executable file
16
AoM_Service/AoM_Configs/generate_config.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
trap ctrl_c INT
|
||||
|
||||
function ctrl_c() {
|
||||
docker stop aom_web
|
||||
docker ps -a | awk '{ print $1,$2 }' | grep aom_web | awk '{print $1 }' | xargs -I {} docker rm {}
|
||||
}
|
||||
|
||||
docker build -f Dockerfile.webapp -t aom_web . && \
|
||||
|
||||
docker run -d -v$(pwd):/web -p80:5000 --name aom_web aom_web && \
|
||||
|
||||
docker logs -f aom_web
|
||||
|
||||
|
||||
0
AoM_Service/AoM_Configs/library/__init__.py
Executable file
0
AoM_Service/AoM_Configs/library/__init__.py
Executable file
84
AoM_Service/AoM_Configs/library/args.py
Executable file
84
AoM_Service/AoM_Configs/library/args.py
Executable file
@@ -0,0 +1,84 @@
|
||||
# Contians the arg parser options.
|
||||
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
def get_builder_args():
|
||||
"""
|
||||
Gets the arguments passed in to the aom_builder main call
|
||||
|
||||
:return: parser object
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="Generates a valid yaml file for alerting on metrics. "
|
||||
"If you are familiar with the yaml structure for an alert"
|
||||
"you don't have to use this builder, it's just convenient")
|
||||
parser.add_argument('-q', '--query', help="The Kariosdb query string to use")
|
||||
parser.add_argument('-i', '--interval', type=int, default=60, help="The interval that the check will run. "
|
||||
"This value is in seconds")
|
||||
parser.add_argument('-t', '--threshold', '--upperthreshold', help="The upper threshold is the value that when reached will cause an alert "
|
||||
"depending on the threshold logic. "
|
||||
"Use in conjunction with lower threshold to define a normal band.")
|
||||
parser.add_argument('-b', '--lowerthreshold', help="The lower threshold is the value that when reached will cause an alert "
|
||||
"depending on the threshold logic"
|
||||
"Use in conjunction with upper threshold to define a normal band.")
|
||||
parser.add_argument('-m', '--measure', choices=['gt', 'lt', 'eq'], help="The measure to use to compare the "
|
||||
"threshold to the values of the alerts")
|
||||
parser.add_argument('-a', '--alert_config', help='A valid Yaml representation of your alerting block')
|
||||
parser.add_argument('-l', '--log_level', type=int, default=0, help="The log level for the aom_builder run. "
|
||||
"[0=Error, 1=Info, 2=Debug]")
|
||||
parser.add_argument('-p', '--port', type=int, default=8080, help="The port to run the webapp on")
|
||||
|
||||
return args_to_dict(parser)
|
||||
|
||||
def get_tester_service_args():
|
||||
"""
|
||||
Gets arguments passed into aom_tester.py
|
||||
Returns: parser object
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="Parameters to start the alerting on metrics dummy tester service")
|
||||
parser.add_argument('-l', '--log_level', type=int, default=0, help="The log level for the aom_service app"
|
||||
"[0=Error, 1=Info, 2=Debug]")
|
||||
parser.add_argument('-a', '--alert_configs', default=None,
|
||||
help="If provided will override the folder location read from the config with the value passed "
|
||||
"in. Is helpful for testing and troubleshooting alerts")
|
||||
parser.add_argument('--hostname', help="If provided, will override the actual hostname check with this value")
|
||||
parser.add_argument('-p', '--port', type=int, default=8080, help="The port to run the webapp on")
|
||||
return args_to_dict(parser)
|
||||
|
||||
def get_service_args():
|
||||
"""
|
||||
Gets arguments passed into aom_service.py
|
||||
Returns: parser object
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="Parameters to start the alerting on metrics service")
|
||||
parser.add_argument('-l', '--log_level', type=int, default=0, help="The log level for the aom_service app"
|
||||
"[0=Error, 1=Info, 2=Debug]")
|
||||
parser.add_argument('-a', '--alert_configs', default=None,
|
||||
help="If provided will override the folder location read from the config with the value passed "
|
||||
"in. Is helpful for testing and troubleshooting alerts")
|
||||
parser.add_argument('-o', '--override', action='store_true', help="Overrides the check leader election value")
|
||||
parser.add_argument('--hostname', help="If provided, will override the actual hostname check with this value")
|
||||
parser.add_argument('-p', '--port', type=int, default=8080, help="The port to run the webapp on")
|
||||
return args_to_dict(parser)
|
||||
|
||||
|
||||
def args_to_dict(parsed_args):
|
||||
"""
|
||||
Converts the argument parser object to a dict
|
||||
Args:
|
||||
parsed_args: Arg parser object
|
||||
Returns:
|
||||
Dictionary of arguments
|
||||
"""
|
||||
try:
|
||||
arg_list = parsed_args.parse_args()
|
||||
# RETURN A DICT OF ARGUMENTS
|
||||
arg_dict = dict()
|
||||
for val in vars(arg_list):
|
||||
arg_dict[val] = getattr(arg_list, val)
|
||||
return arg_dict
|
||||
except argparse.ArgumentError:
|
||||
parsed_args.print_help()
|
||||
sys.exit(1)
|
||||
22
AoM_Service/AoM_Configs/library/config.py
Executable file
22
AoM_Service/AoM_Configs/library/config.py
Executable file
@@ -0,0 +1,22 @@
|
||||
# config.py
|
||||
import logging
|
||||
import glob
|
||||
import yaml
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def glob_the_configs(config_path):
|
||||
"""
|
||||
Args:
|
||||
config_path (string): relative path to the configs
|
||||
Returns:
|
||||
List of configs
|
||||
"""
|
||||
alert_list = []
|
||||
for config_file in glob.glob(config_path + "/*.yaml"):
|
||||
logger.debug("Found {} config".format(config_file))
|
||||
# LOAD CONFIG
|
||||
alert_list.append(yaml.load(open(config_file, 'rb').read()))
|
||||
logger.info("Loaded {} configs".format(len(alert_list)))
|
||||
return alert_list
|
||||
118
AoM_Service/AoM_Configs/library/logger.py
Executable file
118
AoM_Service/AoM_Configs/library/logger.py
Executable file
@@ -0,0 +1,118 @@
|
||||
# logger.py
|
||||
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import os
|
||||
|
||||
logging.getLogger('requests').setLevel(logging.ERROR)
|
||||
logging.getLogger('urllib3').setLevel(logging.ERROR)
|
||||
logging.getLogger('werkzeug').setLevel(logging.ERROR)
|
||||
|
||||
|
||||
class SingleLevelFilter(logging.Filter):
|
||||
def __init__(self, passlevel, reject):
|
||||
"""
|
||||
initilizer(constructor) of the singlelevelfilter
|
||||
@param passlevel (int) - the int value of the level of the log
|
||||
@param reject (bool) - if true will return if the record level is not equal to the passlevel
|
||||
@return SingleLevelFilter object
|
||||
@note Sets some object parameters
|
||||
"""
|
||||
self.passlevel = passlevel
|
||||
self.reject = reject
|
||||
|
||||
def filter(self, record):
|
||||
"""
|
||||
Returns True/False depending on parameters
|
||||
@param record (Log int) - the record that the filter belongs to
|
||||
@return bool - True/False depending on what self.reject is set to and what record.levelno and self.passlevel are set to
|
||||
@note This causes either only logging of the exact same level to get logged, or only logging other than the same level to get logged
|
||||
"""
|
||||
if self.reject:
|
||||
return (record.levelno != self.passlevel)
|
||||
else:
|
||||
return (record.levelno == self.passlevel)
|
||||
|
||||
|
||||
class AlertLogging(logging.Logger):
|
||||
"""
|
||||
Class Object to handle the logging of the alert on metrics service
|
||||
starts at Error level and can flip on (and add) an additional log file and
|
||||
Debug logger as needed.
|
||||
"""
|
||||
|
||||
def __init__(self, name):
|
||||
"""
|
||||
Inits the formaters and logger
|
||||
"""
|
||||
self.name = name
|
||||
self.debug_formatter = logging.Formatter(
|
||||
"%(asctime)s - [%(levelname)s] - [%(module)s:%(lineno)d] - %(message)s", "%m-%d %H:%M:%S")
|
||||
|
||||
self.standard_formatter = logging.Formatter("%(asctime)s - [%(levelname)s] - %(message)s",
|
||||
"%m-%d %H:%M:%S")
|
||||
logging.getLogger()
|
||||
logging.Logger.__init__(self, name, logging.DEBUG)
|
||||
logging.setLoggerClass(AlertLogging)
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
info_handler = logging.StreamHandler()
|
||||
info_handler.setLevel(logging.INFO)
|
||||
info_handler.setFormatter(self.standard_formatter)
|
||||
self.addHandler(info_handler)
|
||||
return self
|
||||
|
||||
def start_log_file(self, file_path, mode='a'):
|
||||
"""
|
||||
Creates a separate log file handler
|
||||
Args:
|
||||
file_path: path to the log file
|
||||
mode: the type of mode to open the file handler with
|
||||
Returns:
|
||||
|
||||
"""
|
||||
self.log_path = file_path
|
||||
work_folder = os.path.dirname(file_path)
|
||||
if len(work_folder) > 0 and not os.path.exists(work_folder):
|
||||
os.makedirs(work_folder)
|
||||
self.log_handler = logging.FileHandler(file_path, mode)
|
||||
self.log_handler.setLevel(logging.DEBUG)
|
||||
self.log_handler.setFormatter(self.debug_formatter)
|
||||
self.addHandler(self.log_handler)
|
||||
|
||||
def stop_log_file(self):
|
||||
"""
|
||||
Closes Log file and sets the handler to None
|
||||
Returns:
|
||||
|
||||
"""
|
||||
self.log_handler.close()
|
||||
self.removeHandler(self.log_handler)
|
||||
self.log_handler = None
|
||||
|
||||
def start_debug(self):
|
||||
"""
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
self.debug_handler = logging.StreamHandler()
|
||||
self.debug_handler.setLevel(logging.DEBUG)
|
||||
self.debug_handler.addFilter(SingleLevelFilter(logging.DEBUG, False))
|
||||
self.debug_handler.setFormatter(self.debug_formatter)
|
||||
self.addHandler(self.debug_handler)
|
||||
|
||||
def stop_debug(self):
|
||||
"""
|
||||
stop the debugger
|
||||
Returns:
|
||||
|
||||
"""
|
||||
self.removeHandler(self.debug_handler)
|
||||
self.debug_handler = None
|
||||
42
AoM_Service/AoM_Configs/publish.sh
Executable file
42
AoM_Service/AoM_Configs/publish.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
GIT_COMMIT=$(git rev-parse HEAD)
|
||||
|
||||
if [[ $GIT_COMMIT == "" ]]; then
|
||||
echo "--Missing required GIT_COMMIT var. Aborting..."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#Setup useful vars
|
||||
team="engvis"
|
||||
app="alert-on-metrics-configs"
|
||||
|
||||
registryV2="registry-app.eng.qops.net:5001"
|
||||
pathV2="${registryV2}/${team}/${app}"
|
||||
commitV2="${pathV2}:${GIT_COMMIT}"
|
||||
latestV2="${pathV2}:latest"
|
||||
|
||||
# In case you use relative paths
|
||||
DIR=$(cd $(dirname $BASH_SOURCE[0]) && pwd)
|
||||
cd $DIR
|
||||
|
||||
echo "--Publishing $app $GIT_COMMIT"
|
||||
|
||||
echo "--Removing old image, so they don't accumulate"
|
||||
docker rmi $latestV2
|
||||
|
||||
#Now fail if anything doesn't work
|
||||
set -e
|
||||
|
||||
if [ -f $app/build.sh ]
|
||||
then
|
||||
echo "--Running pre build steps"
|
||||
$app/build.sh
|
||||
fi
|
||||
|
||||
docker build --pull=true --tag="$commitV2" --tag "$latestV2" .
|
||||
|
||||
echo "--Publishing app container"
|
||||
|
||||
docker push $commitV2
|
||||
docker push $latestV2
|
||||
6
AoM_Service/AoM_Configs/run.sh
Executable file
6
AoM_Service/AoM_Configs/run.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
rsync -a --delete /alert_configs/ /mountpoint/configs/git/
|
||||
rsync -a --delete /alert_routing_lookup/ /mountpoint/alert_routing_lookup/
|
||||
|
||||
ls -l /mountpoint/configs/git/
|
||||
5
AoM_Service/AoM_Configs/run_webapp.sh
Executable file
5
AoM_Service/AoM_Configs/run_webapp.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/ash
|
||||
export FLASK_APP=/web/aom_webapp.py
|
||||
export FLASK_DEBUG=1
|
||||
|
||||
cd /web; flask run --host=0.0.0.0
|
||||
25
AoM_Service/AoM_Configs/service.yaml
Executable file
25
AoM_Service/AoM_Configs/service.yaml
Executable file
@@ -0,0 +1,25 @@
|
||||
#=======================#
|
||||
# All them URLS and tokens
|
||||
#=======================#
|
||||
kairosdb_url: "http://kairosdb-metrics.service.eng.consul:8080/"
|
||||
victorops_url: "https://alert.victorops.com/integrations/generic/20131114/alert/07f108fe-9183-45c3-a888-19e1432806c5/"
|
||||
slack_url: "https://slack.com/api/chat.postMessage"
|
||||
slack_token: "xoxb-76976722775-WY6vtKAk0SQEb8qcbFkLMV81"
|
||||
smtp_server: "internal-smtp1-app.eng.qops.net:2525"
|
||||
consul_url: "http://consul1-app.eng.qops.net:8500/v1/kv/service/alert-on-metrics/leader-lock"
|
||||
sensu_endpoint: "https://sensu-api.eng.qops.net:443/results"
|
||||
|
||||
#=======================#
|
||||
# Logging Information
|
||||
#=======================#
|
||||
log_path: "logs/aom_service.log"
|
||||
|
||||
#=======================#
|
||||
# alerts folder
|
||||
#=======================#
|
||||
alert_folder: "alert_configs"
|
||||
|
||||
#=======================#
|
||||
# request timeout value
|
||||
#=======================#
|
||||
timeout: 90
|
||||
104
AoM_Service/AoM_Configs/show_config.py
Executable file
104
AoM_Service/AoM_Configs/show_config.py
Executable file
@@ -0,0 +1,104 @@
|
||||
import glob
|
||||
import yaml
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import re
|
||||
import requests
|
||||
import numpy
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
import datetime
|
||||
import random
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
#from pdb import set_trace as bp
|
||||
|
||||
timeout = 180
|
||||
# if no argument print help and exit
|
||||
if len(sys.argv) == 1:
|
||||
print("You need to specify an alert config file.")
|
||||
exit(1)
|
||||
#else
|
||||
config_file = 'alert_configs/'+sys.argv[1]+'.yaml'
|
||||
|
||||
# test file exists or exit
|
||||
|
||||
alert_config = yaml.load(open(config_file, 'rb').read())
|
||||
|
||||
# We will show 10 intervals by default
|
||||
|
||||
if len(sys.argv) == 3:
|
||||
interval = int(sys.argv[2])
|
||||
else:
|
||||
interval = 10
|
||||
alert_config['query']['start_relative']['value'] = str(int(alert_config['query']['start_relative']['value'])*interval)
|
||||
|
||||
kairosdb_url = "http://kairosdb-metrics.service.eng.consul:8080/"
|
||||
|
||||
query_url = os.path.join(kairosdb_url + "api/v1/datapoints/query")
|
||||
#ret = requests.post(query_url, data=json.dumps(query), timeout)
|
||||
ret = requests.post(query_url, json.dumps(alert_config['query']), timeout)
|
||||
results = ret.json()['queries'][0]['results']
|
||||
|
||||
# Transforming to human readable data
|
||||
# for result in results[0]['values']:
|
||||
# result[0] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(result[0]/1000))
|
||||
# result[0] = datetime.datetime.strptime(result[0],'%Y-%m-%d %H:%M:%S')
|
||||
for result in results:
|
||||
for value in result['values']:
|
||||
# bp()
|
||||
# transform date from epoch to human readable format
|
||||
value[0] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(value[0]/1000))
|
||||
# transform date string to datetime object
|
||||
value[0] = datetime.datetime.strptime(value[0],'%Y-%m-%d %H:%M:%S')
|
||||
series = numpy.array(result['values'])
|
||||
label_str = str(result['group_by'][0].get('group', ''))
|
||||
line_color = tuple(numpy.random.random(size=3))
|
||||
plt.plot_date(series[:,0],series[:,1], marker='.', color=line_color, linestyle='-', label=label_str)
|
||||
#series = numpy.array(results[0]['values'])
|
||||
#converted_dates = map(datetime.datetime.strptime, datelist, len(datelist)*['%Y-%m-%d %H:%M:%S'])
|
||||
#x_axis = (converted_dates)
|
||||
formatter = mdates.DateFormatter('%H:%M:%S')
|
||||
|
||||
# ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
|
||||
# series = series.astype(numpy.unicode, copy=False)
|
||||
ax = plt.subplot()
|
||||
#ax.set_xlabel('TIME')
|
||||
#ax.set_ylabel('VALUE')
|
||||
#bc = plt.axes()
|
||||
|
||||
#bc.xaxis.set_major_formatter(formatter)
|
||||
#plt.plot_date(series[:,0],series[:,1], marker='o', color='b', linestyle='-')
|
||||
#plt.plot_date(converted_dates,series[:,1], marker='o', color='b', linestyle='-')
|
||||
#ax.set_xticks(series[:,0])
|
||||
#ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
|
||||
#ax.xaxis.set_minor_formatter(mdates.DateFormatter("%Y-%m"))
|
||||
# ax = plt.subplot.gcf().axes[0]
|
||||
#ax.set_title(sys.argv[1])
|
||||
ax.xaxis.set_major_formatter(formatter)
|
||||
#plt.xaxis.set_major_formatter(formatter)
|
||||
plt.title(sys.argv[1])
|
||||
plt.legend()
|
||||
# pyplot.gcf().autofmt_xdate(rotation=25)
|
||||
#ax.xaxis_date()
|
||||
# ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
|
||||
# ax.xaxis.set_minor_formatter(mdates.DateFormatter("%Y-%m"))
|
||||
# ax.plot(series[:,0],series[:,1], marker='o', color='b', linestyle='-')
|
||||
myRe = re.compile('^(?!occurrences).*_threshold$')
|
||||
# Adding thresholds to the graph
|
||||
for key in alert_config:
|
||||
if myRe.match(key):
|
||||
plt.axhline(y=float(alert_config[key]), color='r', linestyle='--', label=str(key))
|
||||
plt.text(series[0][0],float(alert_config[key]),key)
|
||||
#plt.gcf().autofmt_xdate()
|
||||
|
||||
#ax = .add_axes([0,0,1,1])
|
||||
|
||||
plt.gcf().autofmt_xdate(rotation=25)
|
||||
#plt.axhline(y=500000, color='o', linestyle='-')
|
||||
|
||||
|
||||
plt.show()
|
||||
#results[0]['values']
|
||||
30
AoM_Service/AoM_Configs/test_changed.sh
Executable file
30
AoM_Service/AoM_Configs/test_changed.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
if [ -z $GIT_COMMIT ]; then
|
||||
echo "Expected env var 'GIT_COMMIT' to be set. Exiting..."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Check that only alert confings are being pushed"
|
||||
echo "$PWD"
|
||||
|
||||
for file in $(git diff-tree -r --name-only ${GIT_COMMIT}^1 ${GIT_COMMIT}); do
|
||||
new_id=$(grep ^id\: $file)
|
||||
if [ ! -z "$new_id" ]; then
|
||||
total_id=$(grep "$new_id" alert_configs/*.yaml | wc -l)
|
||||
if [ $total_id -gt 1 ] ; then
|
||||
echo "Duplicated id found! Please update the id of the alert configuration"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
dir=$(dirname ${file})
|
||||
# alert_configs/ change triggers a test of the new or changed aler configs
|
||||
if [ "$dir" == "alert_configs" ] || [ "$dir" == "alert_routing_lookup" ] ; then
|
||||
echo "Good to merge"
|
||||
exit 0
|
||||
else
|
||||
echo "Only automatic merges allowed for alert config files"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
60
AoM_Service/AoM_Configs/validate_yaml.py
Executable file
60
AoM_Service/AoM_Configs/validate_yaml.py
Executable file
@@ -0,0 +1,60 @@
|
||||
import yaml
|
||||
import glob
|
||||
|
||||
if __name__ == "__main__":
|
||||
alert_list = []
|
||||
bad_alert_list = []
|
||||
print("Collecting all yaml configs")
|
||||
# COLLECT CONFIG FILES
|
||||
for config_file in glob.glob("./alert_configs/*.yaml"):
|
||||
print("Found {} config".format(config_file))
|
||||
alert_list.append(config_file)
|
||||
print("Collecting all yaml configs")
|
||||
# PARSE CONFIG FILES AND VALIDATE THEIR VALUES
|
||||
for alert in alert_list:
|
||||
print("Validating file {}".format(alert))
|
||||
try:
|
||||
config = yaml.load(open(alert, 'rb').read())
|
||||
assert len(config['alerts']) > 0, "No Alerts configured, this is a dead config"
|
||||
assert len(config['query']) > 0, "No Query, this is a dead config"
|
||||
assert config['interval'] >= 30, "Intervals less than 30 are invalid"
|
||||
assert len(config['id']) > 0, "Alert ID is empty, this is a dead config"
|
||||
if config.get('query_type') == 'prometheus':
|
||||
assert type(config['query']) is str, "Invalid Prometheus query"
|
||||
assert "$" not in config['query'], "Prometheus query should not contain variables"
|
||||
else:
|
||||
assert type(config['query']) is dict, "Kairosdb Query string cannot be validated as proper JSON"
|
||||
defined_tags = set(config['query']['metrics'][0]['tags'].keys()).union({'','dc','fqdn'})
|
||||
# IF THERE IS AGGREGATION WE HAVE TO ADD THESE TAGS
|
||||
if 'group_by' in config['query']['metrics'][0]:
|
||||
defined_tags.update(set(config['query']['metrics'][0]['group_by'][0]['tags']))
|
||||
# for undefined_tag in set(config['tags']).difference(defined_tags):
|
||||
# print("WARNING! {} tag is not defined on the query. Please make sure it does exist to "\
|
||||
# "prevent empty results".format(undefined_tag))
|
||||
# OUR MINIMUM THRESHOLD NEED
|
||||
assert 'critical_lower_threshold' in config or 'critical_upper_threshold' in config or \
|
||||
'warning_lower_threshold' in config or 'warning_upper_threshold' in config, \
|
||||
"Config must have at least one threshold set."
|
||||
|
||||
# JUST MAKE SURE YOU ARE NOT DOING SOMETHING STUPID WITH WARNING COMING AFTER CRITICAL
|
||||
if 'warning_lower_threshold' in config and 'critical_lower_threshold' in config:
|
||||
assert config['critical_lower_threshold'] < config['warning_lower_threshold'], \
|
||||
"Lower Critical must be less than Lower Warning"
|
||||
if 'warning_upper_threshold' in config and 'critical_upper_threshold' in config:
|
||||
assert config['critical_upper_threshold'] > config['warning_upper_threshold'], \
|
||||
"Upper Critical must be greater than Upper Warning"
|
||||
|
||||
if 'occurrences_threshold' in config:
|
||||
assert config['occurrences_threshold'] >= 1, \
|
||||
"Having an occurrences value less than 2 is assumed and pointless to specify"
|
||||
except Exception as e:
|
||||
print("Invalid config file: {}\n{}".format(alert, str(e)))
|
||||
bad_alert_list.append("{}\n{}".format(alert, str(e)))
|
||||
# WRITE OUT BAD CONFIGS TO THE RESULTS FILE
|
||||
# with open("./results/test_results.log", "w+") as f:
|
||||
# for alert in bad_alert_list:
|
||||
# f.write("Config is bad: {}".format(alert.replace('\n', ' ')))
|
||||
for alert in bad_alert_list:
|
||||
print("Config is bad: {}".format(alert.replace('\n', ' ')))
|
||||
if bad_alert_list:
|
||||
exit(1)
|
||||
7
AoM_Service/AoM_Configs/webapp/__init__.py
Executable file
7
AoM_Service/AoM_Configs/webapp/__init__.py
Executable file
@@ -0,0 +1,7 @@
|
||||
from flask import Flask, render_template, request, session
|
||||
app = Flask(__name__)
|
||||
app.config['SESSION_TYPE'] = 'filesystem'
|
||||
app.config['SECRET_KEY'] = 'super secret key'
|
||||
|
||||
import webapp.views
|
||||
|
||||
139
AoM_Service/AoM_Configs/webapp/render.py
Executable file
139
AoM_Service/AoM_Configs/webapp/render.py
Executable file
@@ -0,0 +1,139 @@
|
||||
import yaml
|
||||
import os
|
||||
import json
|
||||
import traceback
|
||||
import sys
|
||||
from library.logger import AlertLogging
|
||||
|
||||
logger = AlertLogging('aom')
|
||||
logger.start()
|
||||
|
||||
|
||||
def render_config(config):
|
||||
"""
|
||||
Reads in the config dict and renders to file. config usually from web interface
|
||||
Args:
|
||||
config: The config to use to generate the yaml file
|
||||
Returns:
|
||||
boolean string of 0 if successful and the yaml as string, or 1 and the error
|
||||
"""
|
||||
try:
|
||||
# GET THE NAME OF THE FILE FROM THE CONFIG
|
||||
file_name = ''.join([config['alert_name'], '.yaml'])
|
||||
logger.debug("Filename: {}".format(file_name))
|
||||
# THIS SHOULD BE A PARAMETER PASSED IN
|
||||
file_path = os.path.join('alert_configs', file_name)
|
||||
logger.debug("Full path: {}".format(file_path))
|
||||
# SANITIZE THE CONFIG TO A NEW OBJECT
|
||||
yaml_config = {'alerts': {},
|
||||
'id': config['alert_name'],
|
||||
'interval': 30 if int(config['interval']) < 30 else int(config['interval'])}
|
||||
# SET THE INTERVAL TO lowest value of 30 seconds
|
||||
# SPLIT THE ALERTS INTO A LIST
|
||||
if 'vo' in config:
|
||||
yaml_config['alerts']['vo'] = [x for x in config['vo_list'].split(',') if x]
|
||||
if 'email' in config:
|
||||
yaml_config['alerts']['email'] = [x for x in config['email_list'].split(',') if x]
|
||||
if 'slack' in config:
|
||||
yaml_config['alerts']['slack'] = [x for x in config['slack_list'].split(',') if x]
|
||||
# GET THRESHOLDS AS FLOATS
|
||||
if 'critical_threshold' in config:
|
||||
if config['critical_upper_threshold'] is not "":
|
||||
yaml_config['critical_upper_threshold'] = float(config['critical_threshold'])
|
||||
if 'critical_upper_threshold' in config:
|
||||
if config['critical_upper_threshold'] is not "":
|
||||
yaml_config['critical_upper_threshold'] = float(config['critical_upper_threshold'])
|
||||
if 'warning_threshold' in config:
|
||||
yaml_config['warning_upper_threshold'] = float(config['warning_threshold'])
|
||||
if 'warning_upper_threshold' in config:
|
||||
yaml_config['warning_upper_threshold'] = float(config['warning_upper_threshold'])
|
||||
if 'critical_lower_threshold' in config:
|
||||
if config['critical_lower_threshold'] is not "":
|
||||
yaml_config['critical_lower_threshold'] = float(config['critical_lower_threshold'])
|
||||
if 'warning_lower_threshold' in config:
|
||||
yaml_config['warning_lower_threshold'] = float(config['warning_lower_threshold'])
|
||||
if 'occurrences' in config:
|
||||
yaml_config['occurrences_threshold'] = int(config['occurrences_threshold'])
|
||||
# PARSE THE QUERY OUT INTO A DICT OBJECT
|
||||
if config['prometheus_query']:
|
||||
yaml_config['query_type'] = 'prometheus'
|
||||
yaml_config['prometheus_url'] = config['prometheus_url']
|
||||
yaml_config['query'] = config['prometheus_query']
|
||||
yaml_config['start_time'] = config['start_time']
|
||||
yaml_config['end_time'] = config['end_time']
|
||||
else:
|
||||
yaml_config['query_type'] = 'kairosdb'
|
||||
yaml_config['query'] = json.loads(config['kairosdb_query'])
|
||||
# GET THE TAGS, COMMA SEPARATED
|
||||
tags = config['tags'].split(',')
|
||||
yaml_config['tags'] = [x for x in tags if x]
|
||||
# GET THE URL
|
||||
yaml_config['url'] = config['url']
|
||||
# WRITE TO FILE
|
||||
yaml_str = yaml.dump(yaml_config, default_flow_style=False, explicit_start=True)
|
||||
with open(file_path, 'w') as f:
|
||||
f.write(yaml_str)
|
||||
return 0, yaml_str
|
||||
except json.decoder.JSONDecodeError:
|
||||
return 1, "Query string is not valid json: {}".format(traceback.format_stack())
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Unable to render yaml config file to disk")
|
||||
_, _, ex_traceback = sys.exc_info()
|
||||
return 1, render_traceback(e, ex_traceback)
|
||||
|
||||
|
||||
def render_yaml(alert_id):
|
||||
"""
|
||||
Reads in a yaml file into the config that the web expects.
|
||||
Args:
|
||||
alert_id: then name of the config
|
||||
Returns:
|
||||
Dictionary
|
||||
"""
|
||||
file_name = ''.join([alert_id, '.yaml'])
|
||||
file_path = os.path.join('alert_configs', file_name)
|
||||
config = yaml.load(open(file_path, 'r').read())
|
||||
yaml_config = dict()
|
||||
yaml_config['alert_name'] = config['id']
|
||||
yaml_config['interval'] = config['interval']
|
||||
if 'critical_threshold' in config:
|
||||
yaml_config['critical_upper_threshold'] = config['critical_threshold']
|
||||
if 'critical_upper_threshold' in config:
|
||||
yaml_config['critical_upper_threshold'] = config['critical_upper_threshold']
|
||||
if 'critical_lower_threshold' in config:
|
||||
yaml_config['critical_lower_threshold'] = config['critical_lower_threshold']
|
||||
if 'warning_threshold' in config:
|
||||
yaml_config['warning_upper_threshold'] = config['warning_threshold']
|
||||
if 'warning_upper_threshold' in config:
|
||||
yaml_config['warning_upper_threshold'] = config['warning_upper_threshold']
|
||||
if 'warning_lower_threshold' in config:
|
||||
yaml_config['warning_lower_threshold'] = config['warning_lower_threshold']
|
||||
if 'occurrences_threshold' in config:
|
||||
yaml_config['occurrences_threshold'] = config['occurrences_threshold']
|
||||
yaml_config['url'] = config['url']
|
||||
if 'email' in config['alerts']:
|
||||
yaml_config['email'] = 'on'
|
||||
yaml_config['email_list'] = ','.join(config['alerts']['email'])
|
||||
if 'vo' in config['alerts']:
|
||||
yaml_config['vo'] = 'on'
|
||||
yaml_config['vo_list'] = ','.join(config['alerts']['vo'])
|
||||
if 'slack' in config['alerts']:
|
||||
yaml_config['slack'] = 'on'
|
||||
yaml_config['slack_list'] = ','.join(config['alerts']['slack'])
|
||||
if 'tags' in config:
|
||||
yaml_config['tags'] = ','.join(config['tags'])
|
||||
if config.get('query_type') == 'prometheus':
|
||||
yaml_config['prometheus_query'] = config['query']
|
||||
yaml_config['prometheus_url'] = config['prometheus_url']
|
||||
yaml_config['start_time'] = config['start_time']
|
||||
yaml_config['end_time'] = config['end_time']
|
||||
else:
|
||||
yaml_config['kairosdb_query'] = json.dumps(config['query'], sort_keys=True, indent=4, separators=(',', ': '))
|
||||
return yaml_config
|
||||
|
||||
|
||||
def render_traceback(ex, ex_traceback):
|
||||
tb_lines = traceback.format_exception(ex.__class__, ex, ex_traceback)
|
||||
logger.exception("Exception")
|
||||
return '\n'.join(tb_lines)
|
||||
14
AoM_Service/AoM_Configs/webapp/static/bootstrap-theme.min.css
vendored
Executable file
14
AoM_Service/AoM_Configs/webapp/static/bootstrap-theme.min.css
vendored
Executable file
File diff suppressed because one or more lines are too long
14
AoM_Service/AoM_Configs/webapp/static/bootstrap.min.css
vendored
Executable file
14
AoM_Service/AoM_Configs/webapp/static/bootstrap.min.css
vendored
Executable file
File diff suppressed because one or more lines are too long
11
AoM_Service/AoM_Configs/webapp/static/bootstrap.min.js
vendored
Executable file
11
AoM_Service/AoM_Configs/webapp/static/bootstrap.min.js
vendored
Executable file
File diff suppressed because one or more lines are too long
29
AoM_Service/AoM_Configs/webapp/static/style.css
Executable file
29
AoM_Service/AoM_Configs/webapp/static/style.css
Executable file
@@ -0,0 +1,29 @@
|
||||
body { font-family: sans-serif; background: #eee; }
|
||||
a, h1, h2 { color: #377BA8; }
|
||||
h1, h2 { font-family: 'Georgia', serif; margin: 0; }
|
||||
h1 { border-bottom: 2px solid #eee; }
|
||||
h2 { font-size: 1.2em; }
|
||||
|
||||
.page { margin: 2em auto; width: 45em; border: 5px solid #ccc;
|
||||
padding: 0.8em; background: white; }
|
||||
.entries { list-style: none; margin: 0; padding: 0; }
|
||||
.entries li { margin: 0.8em 1.2em; }
|
||||
.entries li h2 { margin-left: -1em; }
|
||||
.add-entry { font-size: 0.9em; border-bottom: 1px solid #ccc; }
|
||||
.add-entry dl { font-weight: bold; }
|
||||
.metanav { text-align: right; font-size: 0.8em; padding: 0.3em;
|
||||
margin-bottom: 1em; background: #fafafa; }
|
||||
.flash { background: #CEE5F5; padding: 0.5em;
|
||||
border: 1px solid #AACBE2; }
|
||||
.error { background: #F0D6D6; padding: 0.5em; }
|
||||
/#.button { border-top: 2px solid #a3ceda;
|
||||
border-left: 2px solid #a3ceda;
|
||||
border-right: 2px solid #4f6267;
|
||||
border-bottom: 2px solid #4F6267;
|
||||
padding: 1px 20px !important;
|
||||
font-size: 14px !important;
|
||||
background-color: #CEE5F5;
|
||||
font-weight: bold;
|
||||
color: #2d525d; }
|
||||
#/
|
||||
.container { width: 500px; clear: both;}
|
||||
28
AoM_Service/AoM_Configs/webapp/templates/debug.html
Executable file
28
AoM_Service/AoM_Configs/webapp/templates/debug.html
Executable file
@@ -0,0 +1,28 @@
|
||||
{% extends "header.html" %}
|
||||
{% block body %}
|
||||
<h2>Form Elements</h2><br />
|
||||
<table>
|
||||
{% for key, value in query.items() %}
|
||||
<tr>
|
||||
<th> {{ key }} </th>
|
||||
<td> {{ value }} </td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table><br/>
|
||||
<p>
|
||||
{{ query.alert_name }}
|
||||
</p>
|
||||
<h2>Rendered Config File</h2><br />
|
||||
<p>{{ file_path }}</p>
|
||||
<p>
|
||||
{% for line in file_contents %}
|
||||
<div>{{ line|safe }}</div>
|
||||
{% endfor %}
|
||||
</p>
|
||||
<br />
|
||||
<form action="{{ url_for('re_build', alert_id=query.alert_name) }}" id="re_build" method="post">
|
||||
<p>
|
||||
<input type="submit" id="submit" class="btn btn-primary" value="Return to Form?">
|
||||
</p>
|
||||
</form>
|
||||
{% endblock %}
|
||||
6
AoM_Service/AoM_Configs/webapp/templates/error.html
Executable file
6
AoM_Service/AoM_Configs/webapp/templates/error.html
Executable file
@@ -0,0 +1,6 @@
|
||||
{% extends "header.html" %}
|
||||
{% block body %}
|
||||
<h1>Error Rendering config:</h1>
|
||||
<p>{{ message }}</p>
|
||||
<p><a href="{{ url_for('index') }}">Return to Creation Page?</a></p>
|
||||
{% endblock %}
|
||||
67
AoM_Service/AoM_Configs/webapp/templates/header.html
Executable file
67
AoM_Service/AoM_Configs/webapp/templates/header.html
Executable file
@@ -0,0 +1,67 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta http-equiv="X-UA-COMPATIBLE" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, intial-scale=1">
|
||||
<title>Alerting On Metrics Yaml Builder</title>
|
||||
<link rel=stylesheet type=text/css href="{{ url_for('static', filename='bootstrap.min.css') }}">
|
||||
<link rel="stylesheet" type=text/css href="{{ url_for('static', filename='style.css') }}">
|
||||
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js"></script>
|
||||
<script src="{{ url_for('static', filename='bootstrap.min.js') }}"></script>
|
||||
<script type="text/javascript">
|
||||
function dynInput(cbox) {
|
||||
console.log(cbox)
|
||||
if (cbox.checked) {
|
||||
var input = document.createElement("input");
|
||||
input.type = "text";
|
||||
input.id = cbox.name + "_list";
|
||||
input.name = cbox.name + "_list";
|
||||
document.getElementById("insertinputs_" + cbox.name).appendChild(input);
|
||||
} else {
|
||||
document.getElementById(cbox.name + "_list").remove();
|
||||
}
|
||||
}
|
||||
|
||||
function dynEnable(cbox) {
|
||||
console.log(cbox);
|
||||
var theId = "#" + cbox.name + "_list";
|
||||
console.log(theId);
|
||||
if (cbox.checked){
|
||||
$(theId)[0].disabled = false;
|
||||
} else {
|
||||
$(theId)[0].disabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
function dynThreshold(cbox) {
|
||||
var theId = "#" + cbox.name + "_threshold";
|
||||
if (cbox.checked){
|
||||
$(theId)[0].disabled = false;
|
||||
} else {
|
||||
$(theId)[0].disabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function forceLower(strInput){
|
||||
strInput.value=strInput.value.toLowerCase().replace(" ","_");
|
||||
}
|
||||
|
||||
function forceComma(strInput){
|
||||
strInput.value=strInput.value.replace(" ",",");
|
||||
}
|
||||
|
||||
function forcePositive(strInput){
|
||||
if (parseInt(strInput.value) <= 1) {
|
||||
strInput.value = 2
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class=page>
|
||||
{% block body %}{% endblock %}
|
||||
966
AoM_Service/AoM_Configs/webapp/templates/index.html
Executable file
966
AoM_Service/AoM_Configs/webapp/templates/index.html
Executable file
@@ -0,0 +1,966 @@
|
||||
{% extends "header.html" %}
|
||||
{% block body %}
|
||||
<form action="{{url_for('index')}}" id="builder" method="post" class="form-horizontal">
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
<h3 class="text-center">Alert Meta</h3>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Alert Name -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label for="alert_name" class="control-label">Alert Name:</label>
|
||||
</div>
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#alertidModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="alertidModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="myModalLabel">Alert Name</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>The alert name acts as both the name of the .yaml file and the id for the alert. The
|
||||
alert name becomes part of what shows up in the title / subject when an alert is
|
||||
triggered</p>
|
||||
<p>Picking an alert name that already exists will overwrite the .yaml configuration file so
|
||||
be aware of what you choose</p>
|
||||
<p>The Alert name is also how this alert will show up in Victorops, Slack and Email
|
||||
(Depending on what options you choose for the Alerting</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-7">
|
||||
<input type="text" id="alert_name" class="form-control" name="alert_name" value="{{ alert_name }}"
|
||||
onkeyup="return forceLower(this);">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Check Interval -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="interval">Check Interval: </label>
|
||||
</div>
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#intervalModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="intervalModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="checkInterval">Check Interval</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>The check interval is how often the check will run the query (in seconds) and measure the
|
||||
results</p>
|
||||
<p>Anything less than 30 seconds will automatically be bumped up
|
||||
to 30 seconds. This is due to the fact that metrics are collected every 30 seconds, so
|
||||
checking more often than this would just result in the same values returned from the
|
||||
query
|
||||
as nothing would have changed yet</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-7">
|
||||
<input type="number" id="interval" class="form-control" name="interval" value="{{ interval }}">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Upper Critical Threshold -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="criticalUpperThreshold">Upper Critical Threshold: </label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#criticalUpperThresholdModal">
|
||||
info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="criticalUpperThresholdModal" tabindex="-1" role="dialog"
|
||||
aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="criticalUpperThresholdTitle">Critical Threshold</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>This is a Floating Point or Int that when the results back from the query exceeds this
|
||||
number, a critical alert will trigger.</p>
|
||||
<p>Only Critical Alerts will also trigger emails and slack alerts (if set)</p>
|
||||
<p>Your query needs to be simplified down to just one or two
|
||||
values per grouping (A start and end metric). The alerting system will look at all
|
||||
values per grouping and check if any of the values are over the threshold to send out an
|
||||
alert</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-7">
|
||||
<input type="number" class="form-control" id="criticalUpperThreshold" name="critical_upper_threshold"
|
||||
value="{{ critical_upper_threshold }}"
|
||||
step="0.01"
|
||||
onkeypress="validate(event)">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Lower Critical Threshold -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="criticalLowerThreshold">Lower Critical Threshold: </label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#criticalLowerThresholdModal">
|
||||
info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="criticalLowerThresholdModal" tabindex="-1" role="dialog"
|
||||
aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="criticalLowerThresholdTitle">Lower Critical Threshold</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>This is a Floating Point or Int that when the results back from the query drops below this
|
||||
number, a critical alert will trigger.</p>
|
||||
<p>Only Critical Alerts will also trigger emails and slack alerts (if set)</p>
|
||||
<p>Your query needs to be simplified down to just one or two
|
||||
values per grouping (A start and end metric). The alerting system will look at all
|
||||
values per grouping and check if any of the values are over the threshold to send out an
|
||||
alert</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-7">
|
||||
<input type="number" class="form-control" id="lower_criticalThreshold" name="critical_lower_threshold"
|
||||
value="{{ critical_lower_threshold }}"
|
||||
step="0.01"
|
||||
onkeypress="validate(event)">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Upper Warning Threshold -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="warningUpperThreshold">Upper Warning Threshold: </label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#warningUpperThresholdModal">
|
||||
info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="warningUpperThresholdModal" tabindex="-1" role="dialog"
|
||||
aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="warningUpperThresholdTitle">Upper Warning Threshold</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>This is a Floating Point or Int that when the results back from the query exceeds this
|
||||
number, a warning alert will trigger.</p>
|
||||
<p>Warnings will not trigger Email or Slack alerts (if set)</p>
|
||||
<p>Your query needs to be simplified down to just one or two
|
||||
values per grouping (A start and end metric). The alerting system will look at all
|
||||
values per grouping and check if any of the values are over the threshold to send out an
|
||||
alert</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-7">
|
||||
<div class="input-group">
|
||||
<span class="input-group-addon">
|
||||
{% if warning_upper_threshold %}
|
||||
{% set warning_upper_checked='checked' %}
|
||||
{% else %}
|
||||
{% set warning_upper_disabled='disabled' %}
|
||||
{% endif %}
|
||||
<input type="checkbox" name="warning_upper" id="warning_upper" aria-label="..." onclick="dynThreshold(this);" {{
|
||||
warning_upper_checked }}>
|
||||
</span>
|
||||
<input type="number" name="warning_upper_threshold" class="form-control" id="warning_upper_threshold"
|
||||
value="{{ warning_upper_threshold }}"
|
||||
aria-label="..." step="0.01" {{ warning_upper_disabled }}>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Lower Warning Threshold -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="warningLowerThreshold">Lower Warning Threshold: </label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#warningLowerThresholdModal">
|
||||
info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="warningLowerThresholdModal" tabindex="-1" role="dialog"
|
||||
aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="warningLowerThresholdTitle">Lower Warning Threshold</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>This is a Floating Point or Int that when the results back from the query drops below this
|
||||
number, a warning alert will trigger.</p>
|
||||
<p>Warnings will not trigger Email or Slack alerts (if set)</p>
|
||||
<p>Your query needs to be simplified down to just one or two
|
||||
values per grouping (A start and end metric). The alerting system will look at all
|
||||
values per grouping and check if any of the values are over the threshold to send out an
|
||||
alert</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-7">
|
||||
<div class="input-group">
|
||||
<span class="input-group-addon">
|
||||
{% if warning_lower_threshold %}
|
||||
{% set warning_lower_checked='checked' %}
|
||||
{% else %}
|
||||
{% set warning_lower_disabled='disabled' %}
|
||||
{% endif %}
|
||||
<input type="checkbox" name="warning_lower" id="warning_lower" aria-label="..." onclick="dynThreshold(this);" {{
|
||||
warning_lower_checked }}>
|
||||
</span>
|
||||
<input type="number" name="warning_lower_threshold" class="form-control" id="warning_lower_threshold"
|
||||
value="{{ warning_lower_threshold }}"
|
||||
aria-label="..." step="0.01" {{ warning_lower_disabled }}>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Occurrences -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="occurrences_threshold">Frequency: </label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#occurrencesModal">
|
||||
info
|
||||
</button>
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="occurrencesModal" tabindex="-1" role="dialog"
|
||||
aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="occurrencesTitle">Frequency</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>The occurrences value, when set, will determine how many times the alert has to exceed the
|
||||
threshold in order for an alert to trigger.</p>
|
||||
<p>This is particularly useful for metrics that can be spikey and resolve quickly,
|
||||
using occurrences allows you to only be alerted when a spike is no longer spiking but
|
||||
maintaining the rate over the period of time</p>
|
||||
<p>This is compared once every interval, so if your alert is set to 5 minutes, with a
|
||||
occurrences of 3, you'd have to have the threshold exceeded for 15 minutes before any
|
||||
alerts
|
||||
are sent out.</p>
|
||||
<p>The occurrences value is optional, and if not enabled, the service assumes that after 1 query
|
||||
exceeding the threshold is enough to trigger alerts. So in this way having an occurrences value
|
||||
set
|
||||
to 1 or not enabled does the same thing.</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-7">
|
||||
<div class="input-group">
|
||||
<span class="input-group-addon">
|
||||
{% if occurrences_threshold and occurrences_threshold is number and occurrences_threshold > 1 %}
|
||||
{% set occurrences_checked='checked' %}
|
||||
{% else %}
|
||||
{% set occurrences_disabled='disabled' %}
|
||||
{% endif %}
|
||||
<input type="checkbox" name="occurrences" id="occurrences" aria-label="..."
|
||||
onclick="dynThreshold(this);" {{
|
||||
occurrences_checked }}>
|
||||
</span>
|
||||
<input type="number" name="occurrences_threshold" class="form-control" id="occurrences_threshold"
|
||||
value="{{ occurrences_threshold }}"
|
||||
aria-label="..." step="1" min="2" {{ occurrences_disabled }} onkeyup="return forcePositive(this);">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Tags -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="tags">Tags:</label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#tagsModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="tagsModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="tagsTitle">Tags</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>A comma seperated list of tags used to include in the alert subject</p>
|
||||
<p>In the event of an alert, the tags will be used to look up distinctive
|
||||
information and
|
||||
include as part of the alert</p>
|
||||
<p>For example including the dc tag in an alert means that if an alert occurs, the
|
||||
alerting
|
||||
system will look up the dc value from the returned query and included it as part
|
||||
of the
|
||||
alert subject</p>
|
||||
<p>These are the same tag values used to build kiarosdb queries</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-7">
|
||||
<input type="text" name="tags" id="tags" class="form-control" value="{{ tags }}" ,
|
||||
onkeyup="return forceComma(this);">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
<h3 class="text-center">Notifications</h3>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- VictorOps Alerts -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="vo">VictorOps Alert:</label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#voModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="voModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="voTitle">Victor Ops Alert List</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>A comma seperated list of victorops routing keys</p>
|
||||
<p>In the event of an alert, the Ids listed here will recieve a victorops alert</p>
|
||||
<p>If the checkbox isn't selected, when generating the .yaml config the values
|
||||
listed will
|
||||
be ignored</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-7">
|
||||
<div class="input-group">
|
||||
<span class="input-group-addon">
|
||||
{% if vo=="on" %}
|
||||
{% set vo_checked='checked' %}
|
||||
{% else %}
|
||||
{% set vo_disabled='disabled' %}
|
||||
{% endif %}
|
||||
<input type="checkbox" name="vo" id="vo" aria-label="..." onclick="dynEnable(this);" {{ vo_checked
|
||||
}}>
|
||||
</span>
|
||||
<input type="text" class="form-control" name="vo_list" id="vo_list" aria-label="..."
|
||||
value="{{ vo_list }}" onkeyup="return forceComma(this);" {{ vo_disabled }}>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Email Alerts -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="email">Email Alert:</label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#emailModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="emailModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="emailTitle">Email Alert List</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>A comma seperated list of email names to send alerts to</p>
|
||||
<p>In the event of an alert, the names listed here will recieve an email alert</p>
|
||||
<p>The alerting system appends an @qualtrics.com to the names listed here, so there
|
||||
is no
|
||||
need to include the @domain as it's assumed all alerting emails would go to a
|
||||
qualtrics
|
||||
address</p>
|
||||
<p>Also the SMTP server can only send to @qualtrics addresses anyways</p>
|
||||
<p>For example sending an email to both netops and devops on an alert would be <b>devops,netops</b>
|
||||
in the text box.</p>
|
||||
<p>If the checkbox isn't selected, when generating the .yaml config the values
|
||||
listed will
|
||||
be ignored</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-7">
|
||||
<div class="input-group">
|
||||
<span class="input-group-addon">
|
||||
{% if email=="on" %}
|
||||
{% set email_checked='checked' %}
|
||||
{% else %}
|
||||
{% set email_disabled='disabled' %}
|
||||
{% endif %}
|
||||
<input type="checkbox" name="email" id="email" aria-label="..." onclick="dynEnable(this);" {{
|
||||
email_checked }}>
|
||||
</span>
|
||||
<input type="text" name="email_list" class="form-control" id="email_list"
|
||||
value="{{ email_list }}"
|
||||
aria-label="..." onkeyup="return forceComma(this);" {{ email_disabled }}>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Slack Alert List -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="slack">Slack Alert:</label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#slackModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="slackModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="slackTitle">Slack Alert List</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>A comma seperated list of slack names to send alerts to</p>
|
||||
<p>In the event of an alert, the names listed here will recieve a slack alert from a
|
||||
slackbot</p>
|
||||
<p>You must include a @ for direct message alerts and # for channel alerts</p>
|
||||
<p>For example, if the DevOps team wanted to get an alert in slack, the value in the
|
||||
text
|
||||
box would be <b>#devops</b>.
|
||||
If I wanted to also include a direct message as well then the value would be
|
||||
<b>#devops,@codyc</b></p>
|
||||
<p>Don't troll people with your metric alerts bombing peopls slack, it's unkind</p>
|
||||
<p>If the checkbox isn't selected, when generating the .yaml config the values
|
||||
listed will
|
||||
be ignored</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-7">
|
||||
<div class="input-group">
|
||||
<span class="input-group-addon">
|
||||
{% if slack=="on" %}
|
||||
{% set slack_checked='checked' %}
|
||||
{% else %}
|
||||
{% set slack_disabled='disabled' %}
|
||||
{% endif %}
|
||||
<input type="checkbox" name="slack" id="slack" aria-label="..." onclick="dynEnable(this);" {{
|
||||
slack_checked }}>
|
||||
</span>
|
||||
<span id="insertinputs_slack"></span>
|
||||
<input type="text" name="slack_list" class="form-control" id="slack_list"
|
||||
value="{{ slack_list }}"
|
||||
aria-label="..." onkeyup="return forceComma(this);" {{ slack_disabled }}>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
<h3 class="text-center">Dashboard</h3>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Dashboard URL -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="query">Dashboard URL:</label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#dashboardModal">
|
||||
info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="dashboardModal" tabindex="-1" role="dialog"
|
||||
aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="dashboardTitle">Dashboard URL</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>Most queries are built based on some dashboard already built in grafana</p>
|
||||
<p>By including the URL to that dashboard, the oncall engineer recieving the alert
|
||||
will be
|
||||
able to click the link in the alert and get a better picture of what this alert
|
||||
is and
|
||||
and how it relates to the datacenter</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-7">
|
||||
<input type="text" name="url" id="url" class="form-control" value="{{ url }}">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
<h3 class="text-center">Kairosdb Query</h3>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- KairosDB Query -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="query">KariosDB Query:</label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#queryModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="queryModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="queryTitle">KariosDB Query</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>Paste in your KariosDB Query that you have already worked out.</p>
|
||||
<p>You can generate your query by going to the <a
|
||||
href="http://kairosdb-metrics.service.eng.consul:8080/" target="_blank">KariosDB
|
||||
UI
|
||||
in eng</a></p>
|
||||
<p>When generating your metric you will want to get the return values down to just 1
|
||||
or 2
|
||||
results per grouping. This can be done by sending the query to the MAX or MIN
|
||||
aggregators (depending on your logic needs) as the last aggregator in the
|
||||
query</p>
|
||||
<p>You will also want to include a time offset, typically 5 minutes is used for when
|
||||
to
|
||||
start (as from 5 minutes ago to now). Setting the MAX aggregator to this value
|
||||
is
|
||||
usually typical</p>
|
||||
<p>Once you have generated your query and it's returning the results you expect,
|
||||
click the
|
||||
<b>Show Query</b> button on the kairosDB UI and copy the results into this field
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-7">
|
||||
<textarea name="kairosdb_query" id="kairosdb_query" class="form-control" rows="12" cols="50">{{ kairosdb_query }}</textarea>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
<h3 class="text-center">Prometheus Query</h3>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Prometheus URL -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label for="prometheus_url" class="control-label">Prometheus URL:</label>
|
||||
</div>
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#prometheusurlModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="prometheusurlModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="myModalLabel">Prometheus URL</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>URL for the prometheus server</p>
|
||||
<p>Shared, production Prometheus URLs are currently:
|
||||
<ul>
|
||||
<li>http://big-trickster.service.eng.consul:9090</li>
|
||||
</ul>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-7">
|
||||
<input type="text" id="prometheus_url" class="form-control" name="prometheus_url" value="{{ prometheus_url }}"
|
||||
onkeyup="return forceLower(this);">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Prometheus Query -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="query">Prometheus Query:</label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#prometheusQueryModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="prometheusQueryModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="queryTitle">Prometheus Query</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>Paste in your Prometheus Query that you have already worked out.</p>
|
||||
<p>You can generate your query by going to the url of your prometheus endpoint. Eng Vis plans on adding a smart router for this in the future so all instances will be exposed via a single smart proxy, but for now you'll need to know the name. </p><p><a
|
||||
href="http://big-trickster.service.eng.consul:9090/graph" target="_blank">Prometheus Host Metrics
|
||||
UI
|
||||
in eng</a>
|
||||
</p><p>
|
||||
<a
|
||||
href="http://big-trickster.service.eng.consul:9090/graph" target="_blank">Prometheus StatsD and other Metrics
|
||||
UI
|
||||
in eng</a></p>
|
||||
<p>When creating a query, keep in mind a single value returned is gonna be the most
|
||||
useful
|
||||
, so stuff like "topk(1,yourmetrics)" are gonna be good choices. However, if
|
||||
your query has multiple return values AOM will use last value.</p>
|
||||
<p>So if you use a step/duration of 60 and a timspan of 300 between start
|
||||
and
|
||||
end you'll get back 5 values and the last will be used.
|
||||
</p>
|
||||
<p><a href="https://prometheus.io/docs/prometheus/latest/querying/functions/" target="_blank">Prometheus Functions</a></p>
|
||||
<p>
|
||||
<a href="https://prometheus.io/docs/prometheus/latest/querying/operators/" target="_blank">Prometheus Operators</a>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-7">
|
||||
<textarea name="prometheus_query" id="prometheus_query" class="form-control" rows="12" cols="50">{{ prometheus_query }}</textarea>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Start Time -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="start_time">Start Time: </label>
|
||||
</div>
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#startTimeModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="startTimeModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="startTime">Start Time</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>This should be a relative time in seconds like '-600' for 10m, defaults to '-300'</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-7">
|
||||
<input type="text" id="start_time" class="form-control" name="start_time" value="{{ start_time }}">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- End Time -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="end_time">End Time: </label>
|
||||
</div>
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#endTimeModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="endTimeModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="endTime">End Time</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>This can be 'now' (default) or some relative offset like '-30' in seconds</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-7">
|
||||
<input type="text" id="end_time" class="form-control" name="end_time" value="{{ end_time }}">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
<h3 class="text-center">Actions</h3>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Load Config File -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="loadFile">Load Config From File:</label>
|
||||
</div>
|
||||
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#loadModal">info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="loadModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="loadTitle">Load Config from file</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>Load a config already generated to file into the UI</p>
|
||||
|
||||
<p>This is handy when you need to make minor changes to a query, or add additional
|
||||
alerting
|
||||
values or change thresholds. Or if you are just terrified of yaml.</p>
|
||||
<p>Hit the drop down to see a list of all alert configs (the names generated from
|
||||
the values
|
||||
used in the Alert Name field) Hit the Go and the config will load into all the
|
||||
fields</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-7">
|
||||
<div class="input-group">
|
||||
<select name="loadFile" id="loadFile" class="form-control">
|
||||
<option value="" selected></option>
|
||||
{% for f in alert_list %}
|
||||
<option value="{{ f }}">{{ f }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<span class="input-group-btn">
|
||||
<input type="submit" name="generate" id="submitFiles" class="btn btn-primary" value="Go">
|
||||
</span>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Submit Form -->
|
||||
<div class="form-group">
|
||||
<div class="col-sm-4">
|
||||
<label class="control-label" for="submit">Generate YAML:</label>
|
||||
</div>
|
||||
<div class="col-sm-1">
|
||||
<!-- Button trigger modal -->
|
||||
<button type="button" class="btn btn-info btn-xs" data-toggle="modal" data-target="#generateModal">
|
||||
info
|
||||
</button>
|
||||
|
||||
<!-- Modal -->
|
||||
<div class="modal fade" id="generateModal" tabindex="-1" role="dialog"
|
||||
aria-labelledby="myModalLabel">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="generateTitle">Generate Alert Config</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<p>When you are ready to take the values in the form and generate a alert config
|
||||
.yaml file,
|
||||
hit the button</p>
|
||||
<p>This will generate a .yaml file based on the alert name. So for example if one
|
||||
was to
|
||||
have the value <b>mcp_errors_per_dc</b> as an alert name, the resulting file
|
||||
would be
|
||||
<b>mcp_errors_per_dc.yaml</b></p>
|
||||
<p>This <b>will</b> overwrite a .yaml file if the alert name is the same as an
|
||||
already
|
||||
existing file</p>
|
||||
<p>If there are any errors generating the config, the resulting page will include
|
||||
the error
|
||||
message and give you the ability to return back to this page with your form
|
||||
saved</p>
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-7">
|
||||
<input type="submit" id="submit" name='generate' class='btn btn-primary' value="generate"
|
||||
class="button">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</form>
|
||||
{% endblock %}
|
||||
4
AoM_Service/AoM_Configs/webapp/templates/layout.html
Executable file
4
AoM_Service/AoM_Configs/webapp/templates/layout.html
Executable file
@@ -0,0 +1,4 @@
|
||||
{% extends "header.html" %}
|
||||
{% block body %}
|
||||
<h2>Complete all values in the form below</h2>
|
||||
{% endblock %}
|
||||
69
AoM_Service/AoM_Configs/webapp/views.py
Executable file
69
AoM_Service/AoM_Configs/webapp/views.py
Executable file
@@ -0,0 +1,69 @@
|
||||
# views.py
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
|
||||
import yaml
|
||||
from flask import session
|
||||
|
||||
from library.logger import AlertLogging
|
||||
from webapp import app, render_template, request, render
|
||||
|
||||
logger = AlertLogging('aom')
|
||||
logger.start()
|
||||
logger.start_log_file("logs/aom_service.log")
|
||||
|
||||
|
||||
@app.route('/', methods=['GET', 'POST'])
|
||||
def index():
|
||||
logger.debug("Request Method: {}".format(request.method))
|
||||
if request.method == 'GET':
|
||||
# GET BLOB OF FILES
|
||||
service_config = yaml.load(open('service.yaml', 'r').read())
|
||||
alert_list = sorted([os.path.splitext(os.path.basename(x))[0] for x in
|
||||
glob.glob(service_config['alert_folder'] + "/*.yaml")])
|
||||
if 'yaml_config' in session:
|
||||
return render_template('index.html', **json.loads(session['yaml_config']), alert_list=alert_list)
|
||||
else:
|
||||
return render_template('index.html', alert_list=alert_list)
|
||||
elif request.method == 'POST':
|
||||
logger.info("Got a form")
|
||||
if 'go' in request.form['generate'].lower():
|
||||
return re_build(request.form['loadFile'])
|
||||
yaml_config = dict()
|
||||
ret = ''
|
||||
try:
|
||||
for field_name, value in request.form.items():
|
||||
yaml_config[field_name] = value
|
||||
code, ret = render.render_config(yaml_config)
|
||||
assert code == 0
|
||||
return render_template('debug.html', query=yaml_config,
|
||||
file_path='alert_configs/{}.yaml'.format(yaml_config['alert_name']),
|
||||
file_contents=ret.split('\n'))
|
||||
except AssertionError:
|
||||
session['yaml_config'] = json.dumps(yaml_config)
|
||||
return render_template('error.html', message="Failed to render to file: {}".format(ret))
|
||||
except Exception as e:
|
||||
return render_template('error.html', message=str(e))
|
||||
|
||||
|
||||
@app.route('/build/<alert_id>', methods=['POST'])
|
||||
def re_build(alert_id):
|
||||
# READ IN CONFIG FROM ID
|
||||
config = render.render_yaml(alert_id)
|
||||
service_config = yaml.load(open('service.yaml', 'r').read())
|
||||
alert_list = sorted([os.path.splitext(os.path.basename(x))[0] for x in
|
||||
glob.glob(service_config['alert_folder'] + "/*.yaml")])
|
||||
return render_template('index.html', **config, alert_list=alert_list)
|
||||
|
||||
|
||||
@app.route("/debug/")
|
||||
def toggle_debug():
|
||||
if logger.debug_handler:
|
||||
logger.stop_debug()
|
||||
logger.info("Debug Stopped")
|
||||
else:
|
||||
logger.start_debug()
|
||||
logger.debug("Debug Started")
|
||||
return index()
|
||||
3
AoM_Service/AoM_Configs/webapp_requirements.txt
Executable file
3
AoM_Service/AoM_Configs/webapp_requirements.txt
Executable file
@@ -0,0 +1,3 @@
|
||||
requests
|
||||
pyaml
|
||||
flask
|
||||
Reference in New Issue
Block a user