Le monitoring est essentiel pour assurer la disponibilité, la performance et la fiabilité des systèmes informatiques. Il permet de détecter proactivement les problèmes et d'optimiser les ressources.
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: ['alertmanager:9093']
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node'
static_configs:
- targets: ['node-exporter:9100']
- job_name: 'my-app'
static_configs:
- targets: ['app:8080']
# CPU usage moyen
100 - (avg(rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
# Mémoire disponible
node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100
# Requêtes HTTP par seconde
rate(http_requests_total[5m])
# Temps de réponse p95
histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))
# datasource.yml
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
- name: Loki
type: loki
access: proxy
url: http://loki:3100
# alerts.yml
groups:
- name: system_alerts
interval: 30s
rules:
- alert: HighCPU
expr: 100 - (avg(rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "CPU élevé sur {{ $labels.instance }}"
- alert: LowMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: 5m
labels:
severity: critical
annotations:
summary: "Mémoire faible"
- alert: ServiceDown
expr: up == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Service down"
# alertmanager.yml
global:
smtp_smarthost: 'smtp.example.com:587'
smtp_from: 'alertmanager@example.com'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 12h
receiver: 'team-emails'
receivers:
- name: 'team-emails'
email_configs:
- to: 'team@example.com'
# docker-compose.yml
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
ports:
- "9090:9090"
grafana:
image: grafana/grafana:latest
volumes:
- grafana-data:/var/lib/grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
node-exporter:
image: prom/node-exporter:latest
ports:
- "9100:9100"
alertmanager:
image: prom/alertmanager:latest
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
ports:
- "9093:9093"
volumes:
prometheus-data:
grafana-data: