Commit 9bc37663 authored by Hilmar Magnusson's avatar Hilmar Magnusson

new minimal

parent 39e24880
MIT License
Copyright (c) 2016 Stefan Prodan
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
This diff is collapsed.
route:
receiver: 'slack'
receivers:
- name: 'slack'
slack_configs:
- send_resolved: true
text: "{{ .CommonAnnotations.description }}"
username: 'Prometheus'
channel: '#<channel-name>'
api_url: 'https://hooks.slack.com/services/<webhook-id>'
:9090 {
basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD}
proxy / prometheus:9090 {
transparent
}
errors stderr
tls off
}
:9093 {
basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD}
proxy / alertmanager:9093 {
transparent
}
errors stderr
tls off
}
:9091 {
basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD}
proxy / pushgateway:9091 {
transparent
}
errors stderr
tls off
}
:3000 {
proxy / grafana:3000 {
transparent
websocket
}
errors stderr
tls off
}
\ No newline at end of file
version: '2.1'
services:
nodeexporter:
image: prom/node-exporter:v0.18.1
container_name: nodeexporter
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
restart: unless-stopped
network_mode: host
labels:
org.label-schema.group: "monitoring"
cadvisor:
image: google/cadvisor:v0.33.0
container_name: cadvisor
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /cgroup:/cgroup:ro
restart: unless-stopped
network_mode: host
labels:
org.label-schema.group: "monitoring"
......@@ -24,24 +24,8 @@ services:
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
restart: unless-stopped
expose:
- 9090
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
alertmanager:
image: prom/alertmanager:v0.19.0
container_name: alertmanager
volumes:
- ./alertmanager/:/etc/alertmanager/
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
restart: unless-stopped
expose:
- 9093
ports:
- "9090:9090"
networks:
- monitor-net
labels:
......@@ -54,31 +38,16 @@ services:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- ./mydata:/etc/nodeexporter/mydata/
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
- '--collector.textfile.directory=/etc/nodeexporter/mydata'
restart: unless-stopped
expose:
- 9100
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
cadvisor:
image: google/cadvisor:v0.33.0
container_name: cadvisor
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
#- /cgroup:/cgroup:ro #doesn't work on MacOS only for Linux
restart: unless-stopped
expose:
- 8080
ports:
- "9100:9100"
networks:
- monitor-net
labels:
......@@ -98,39 +67,10 @@ services:
- GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
restart: unless-stopped
expose:
- 3000
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
pushgateway:
image: prom/pushgateway:v0.9.1
container_name: pushgateway
restart: unless-stopped
expose:
- 9091
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
caddy:
image: stefanprodan/caddy
container_name: caddy
ports:
- "3000:3000"
- "9090:9090"
- "9093:9093"
- "9091:9091"
volumes:
- ./caddy/:/etc/caddy/
environment:
- ADMIN_USER=${ADMIN_USER:-admin}
- ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
restart: unless-stopped
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -33,7 +33,7 @@ wait_for_api() {
while ! grafana_api GET /api/user/preferences
do
sleep 5
done
done
}
install_datasources() {
......@@ -52,31 +52,13 @@ install_datasources() {
done
}
install_dashboards() {
local dashboard
for dashboard in ${DASHBOARDS_PATH}/*.json
do
if [[ -f "${dashboard}" ]]; then
echo "Installing dashboard ${dashboard}"
if grafana_api POST /api/dashboards/db "" "${dashboard}"; then
echo "installed ok"
else
echo "install failed"
fi
fi
done
}
configure_grafana() {
wait_for_api
install_datasources
install_dashboards
}
echo "Running configure_grafana in the background..."
configure_grafana &
/run.sh
exit 0
\ No newline at end of file
exit 0
# Prometheus on EC2 & ECS:
Some helpers for anyone configuring Prometheus on ECS and AWS EC2.
To get started on AWS ECS and EC2:
*For EC2/ECS nodes*:
- Import the ecs task definition and add cadvisor and node-exporter service/task definition and run them on each host you want to be monitored
- Any hosts which have "Monitoring: On" tag will be automatically added in the targets
- Expose ports 9100 and 9191 to your Prometheus host
*For Prometheus host*:
- Copy prometheus.yml configuration present here to base prometheus configuration to enable EC2 service discovery
- `docker compose up -d`
**Note**:
Set query.staleness-delta to 1m make metrics more realtime
### TODO
- Add alerting rules based on ECS
{
"family": "cadvisor",
"containerDefinitions": [
{
"name": "cadvisor",
"image": "google/cadvisor",
"cpu": 10,
"memory": 300,
"portMappings": [
{
"containerPort": 9191,
"hostPort": 9191
}
],
"essential": true,
"privileged": true,
"mountPoints": [
{
"sourceVolume": "root",
"containerPath": "/rootfs",
"readOnly": true
},
{
"sourceVolume": "var_run",
"containerPath": "/var/run",
"readOnly": false
},
{
"sourceVolume": "sys",
"containerPath": "/sys",
"readOnly": true
},
{
"sourceVolume": "var_lib_docker",
"containerPath": "/var/lib/docker",
"readOnly": true
},
{
"sourceVolume": "cgroup",
"containerPath": "/cgroup",
"readOnly": true
}
]
}
],
"volumes": [
{
"name": "root",
"host": {
"sourcePath": "/"
}
},
{
"name": "var_run",
"host": {
"sourcePath": "/var/run"
}
},
{
"name": "sys",
"host": {
"sourcePath": "/sys"
}
},
{
"name": "var_lib_docker",
"host": {
"sourcePath": "/var/lib/docker/"
}
},
{
"name": "cgroup",
"host": {
"sourcePath": "/cgroup"
}
}
]
}
\ No newline at end of file
{
"family": "prometheus",
"containerDefinitions": [
{
"portMappings": [
{
"hostPort": 9100,
"containerPort": 9100,
"protocol": "tcp"
}
],
"essential": true,
"name": "node_exporter",
"image": "prom/node-exporter",
"cpu": 0,
"privileged": null,
"memoryReservation": 150
}
],
"volumes": [],
"networkMode": "host"
}
global:
scrape_interval: 15s
evaluation_interval: 15s
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'docker-host-alpha'
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- "targets.rules"
- "hosts.rules"
- "containers.rules"
# A scrape configuration containing exactly one endpoint to scrape.
scrape_configs:
- job_name: 'nodeexporter'
scrape_interval: 5s
static_configs:
- targets: ['nodeexporter:9100']
- job_name: 'cadvisor'
scrape_interval: 5s
static_configs:
- targets: ['cadvisor:8080']
- job_name: 'prometheus'
scrape_interval: 10s
static_configs:
- targets: ['localhost:9090']
# sample scrape configuration for AWS EC2
- job_name: 'nodeexporter'
ec2_sd_configs:
- region: us-east-1
port: 9100
relabel_configs:
# Only monitor instances which have a tag called Monitoring "Monitoring"
- source_labels: [__meta_ec2_tag_Monitoring]
regex: On
action: keep
- job_name: 'cadvisor'
ec2_sd_configs:
- region: us-east-1
port: 9010
relabel_configs:
# Only monitor instances which have a tag called Monitoring "Monitoring"
- source_labels: [__meta_ec2_tag_Monitoring]
regex: On
action: keep
port1_openflow{type="inDiscards"} 3355
port1_openflow{type="inFcsErrors"} 18446744073709551615
port1_openflow{type="inOctets"} 261643
port1_openflow{type="inUnicastPkts"} 3399
port1_openflow{type="outErrors"} 18446744073709551615
port1_openflow{type="outOctets"} 293388
port1_openflow{type="outUnicastPkts"} 3376
port10_openflow{type="inFcsErrors"} 18446744073709551615
port10_openflow{type="outErrors"} 18446744073709551615
port11_openflow{type="inFcsErrors"} 18446744073709551615
port11_openflow{type="outErrors"} 18446744073709551615
port12_openflow{type="inFcsErrors"} 18446744073709551615
port12_openflow{type="outErrors"} 18446744073709551615
port13_openflow{type="inFcsErrors"} 18446744073709551615
port13_openflow{type="outErrors"} 18446744073709551615
port14_openflow{type="inFcsErrors"} 18446744073709551615
port14_openflow{type="outErrors"} 18446744073709551615
port15_openflow{type="inFcsErrors"} 18446744073709551615
port15_openflow{type="outErrors"} 18446744073709551615
port16_openflow{type="inFcsErrors"} 18446744073709551615
port16_openflow{type="outErrors"} 18446744073709551615
port17_openflow{type="inFcsErrors"} 18446744073709551615
port17_openflow{type="outErrors"} 18446744073709551615
port17_openflow{type="outOctets"} 235820
port17_openflow{type="outUnicastPkts"} 2683
port18_openflow{type="inFcsErrors"} 18446744073709551615
port18_openflow{type="outErrors"} 18446744073709551615
port18_openflow{type="outOctets"} 226946
port18_openflow{type="outUnicastPkts"} 2582
port19_openflow{type="inFcsErrors"} 18446744073709551615
port19_openflow{type="outErrors"} 18446744073709551615
port2_openflow{type="inDiscards"} 3355
port2_openflow{type="inFcsErrors"} 18446744073709551615
port2_openflow{type="inOctets"} 295233
port2_openflow{type="inUnicastPkts"} 3399
port2_openflow{type="outErrors"} 18446744073709551615
port2_openflow{type="outOctets"} 293475
port2_openflow{type="outUnicastPkts"} 3377
port20_openflow{type="inFcsErrors"} 18446744073709551615
port20_openflow{type="outErrors"} 18446744073709551615
port21_openflow{type="inFcsErrors"} 18446744073709551615
port21_openflow{type="outErrors"} 18446744073709551615
port22_openflow{type="inFcsErrors"} 18446744073709551615
port22_openflow{type="outErrors"} 18446744073709551615
port23_openflow{type="inFcsErrors"} 18446744073709551615
port23_openflow{type="outErrors"} 18446744073709551615
port24_openflow{type="inFcsErrors"} 18446744073709551615
port24_openflow{type="outErrors"} 18446744073709551615
port25_openflow{type="inFcsErrors"} 18446744073709551615
port25_openflow{type="outErrors"} 18446744073709551615
port26_openflow{type="inFcsErrors"} 18446744073709551615
port26_openflow{type="outErrors"} 18446744073709551615
port27_openflow{type="inFcsErrors"} 18446744073709551615
port27_openflow{type="outErrors"} 18446744073709551615
port28_openflow{type="inFcsErrors"} 18446744073709551615
port28_openflow{type="outErrors"} 18446744073709551615
port29_openflow{type="inFcsErrors"} 18446744073709551615
port29_openflow{type="outErrors"} 18446744073709551615
port3_openflow{type="inDiscards"} 3355
port3_openflow{type="inFcsErrors"} 18446744073709551615
port3_openflow{type="inOctets"} 295159
port3_openflow{type="inUnicastPkts"} 3398
port3_openflow{type="outErrors"} 18446744073709551615
port3_openflow{type="outOctets"} 293388
port3_openflow{type="outUnicastPkts"} 3376
port30_openflow{type="inFcsErrors"} 18446744073709551615
port30_openflow{type="outErrors"} 18446744073709551615
port31_openflow{type="inFcsErrors"} 18446744073709551615
port31_openflow{type="outErrors"} 18446744073709551615
port32_openflow{type="inFcsErrors"} 18446744073709551615
port32_openflow{type="outErrors"} 18446744073709551615
port33_openflow{type="inFcsErrors"} 18446744073709551615
port33_openflow{type="outErrors"} 18446744073709551615
port34_openflow{type="inFcsErrors"} 18446744073709551615
port34_openflow{type="outErrors"} 18446744073709551615
port35_openflow{type="inFcsErrors"} 18446744073709551615
port35_openflow{type="outErrors"} 18446744073709551615
port36_openflow{type="inFcsErrors"} 18446744073709551615
port36_openflow{type="outErrors"} 18446744073709551615
port37_openflow{type="inFcsErrors"} 18446744073709551615
port37_openflow{type="outErrors"} 18446744073709551615
port38_openflow{type="inFcsErrors"} 18446744073709551615
port38_openflow{type="outErrors"} 18446744073709551615
port39_openflow{type="inFcsErrors"} 18446744073709551615
port39_openflow{type="outErrors"} 18446744073709551615
port4_openflow{type="inDiscards"} 3355
port4_openflow{type="inFcsErrors"} 18446744073709551615
port4_openflow{type="inOctets"} 295159
port4_openflow{type="inUnicastPkts"} 3398
port4_openflow{type="outErrors"} 18446744073709551615
port4_openflow{type="outOctets"} 293549
port4_openflow{type="outUnicastPkts"} 3378
port40_openflow{type="inFcsErrors"} 18446744073709551615
port40_openflow{type="outErrors"} 18446744073709551615
port41_openflow{type="inFcsErrors"} 18446744073709551615
port41_openflow{type="outErrors"} 18446744073709551615
port42_openflow{type="inFcsErrors"} 18446744073709551615
port42_openflow{type="outErrors"} 18446744073709551615
port43_openflow{type="inFcsErrors"} 18446744073709551615
port43_openflow{type="outErrors"} 18446744073709551615
port44_openflow{type="inFcsErrors"} 18446744073709551615
port44_openflow{type="outErrors"} 18446744073709551615
port45_openflow{type="inFcsErrors"} 18446744073709551615
port45_openflow{type="outErrors"} 18446744073709551615
port46_openflow{type="inFcsErrors"} 18446744073709551615
port46_openflow{type="outErrors"} 18446744073709551615
port47_openflow{type="inFcsErrors"} 18446744073709551615
port47_openflow{type="outErrors"} 18446744073709551615
port48_openflow{type="inFcsErrors"} 18446744073709551615
port48_openflow{type="outErrors"} 18446744073709551615
port49_openflow{type="inFcsErrors"} 18446744073709551615
port49_openflow{type="outErrors"} 18446744073709551615
port5_openflow{type="inDiscards"} 3355
port5_openflow{type="inFcsErrors"} 18446744073709551615
port5_openflow{type="inOctets"} 295233
port5_openflow{type="inUnicastPkts"} 3399
port5_openflow{type="outErrors"} 18446744073709551615
port5_openflow{type="outOctets"} 293462
port5_openflow{type="outUnicastPkts"} 3377
port50_openflow{type="inFcsErrors"} 18446744073709551615
port50_openflow{type="outErrors"} 18446744073709551615
port51_openflow{type="inFcsErrors"} 18446744073709551615
port51_openflow{type="outErrors"} 18446744073709551615
port52_openflow{type="inFcsErrors"} 18446744073709551615
port52_openflow{type="outErrors"} 18446744073709551615
port53_openflow{type="inDiscards"} 3332
port53_openflow{type="inFcsErrors"} 18446744073709551615
port53_openflow{type="inOctets"} 295288
port53_openflow{type="inUnicastPkts"} 3360
port53_openflow{type="outErrors"} 18446744073709551615
port53_openflow{type="outOctets"} 296720
port53_openflow{type="outUnicastPkts"} 3376
port54_openflow{type="inFcsErrors"} 18446744073709551615
port54_openflow{type="outErrors"} 18446744073709551615
port6_openflow{type="inDiscards"} 3355
port6_openflow{type="inFcsErrors"} 18446744073709551615
port6_openflow{type="inOctets"} 295159
port6_openflow{type="inUnicastPkts"} 3398
port6_openflow{type="outErrors"} 18446744073709551615
port6_openflow{type="outOctets"} 293388
port6_openflow{type="outUnicastPkts"} 3376
port7_openflow{type="inDiscards"} 3355
port7_openflow{type="inFcsErrors"} 18446744073709551615
port7_openflow{type="inOctets"} 295159
port7_openflow{type="inUnicastPkts"} 3398
port7_openflow{type="outErrors"} 18446744073709551615
port7_openflow{type="outOctets"} 293388
port7_openflow{type="outUnicastPkts"} 3376
port8_openflow{type="inDiscards"} 3355
port8_openflow{type="inFcsErrors"} 18446744073709551615
port8_openflow{type="inOctets"} 295159
port8_openflow{type="inUnicastPkts"} 3398
port8_openflow{type="outErrors"} 18446744073709551615
port8_openflow{type="outOctets"} 293462
port8_openflow{type="outUnicastPkts"} 3377
port9_openflow{type="inFcsErrors"} 18446744073709551615
port9_openflow{type="outErrors"} 18446744073709551615
openflow_calls_total{method="post",code="200"} 1337
aa_number{port="port53" ,ofagent="1338" } 1234
......@@ -18,36 +18,7 @@ scrape_configs:
static_configs:
- targets: ['nodeexporter:9100']
- job_name: 'cadvisor'
scrape_interval: 5s
static_configs:
- targets: ['cadvisor:8080']
- job_name: 'prometheus'
scrape_interval: 10s
static_configs:
- targets: ['localhost:9090']
- job_name: 'pushgateway'
scrape_interval: 10s
honor_labels: true
static_configs:
- targets: ['pushgateway:9091']
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- 'alertmanager:9093'
# - job_name: 'nginx'
# scrape_interval: 10s
# static_configs:
# - targets: ['nginxexporter:9113']
# - job_name: 'aspnetcore'
# scrape_interval: 10s
# static_configs:
# - targets: ['eventlog-proxy:5000', 'eventlog:5000']
This diff was suppressed by a .gitattributes entry.
This diff was suppressed by a .gitattributes entry.
This diff was suppressed by a .gitattributes entry.
This diff was suppressed by a .gitattributes entry.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment