More container alerts
This commit is contained in:
@@ -150,6 +150,39 @@ groups:
|
||||
description: "The container update metric indicates a failure. Check logs for details."
|
||||
source: 'https://monitor.pukeko.xyz'
|
||||
|
||||
- alert: ContainerFailure
|
||||
expr: container_last_seen == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Container failure on {{ $labels.instance }}"
|
||||
description: "No data received from a container for the last hour on {{ $labels.instance }}"
|
||||
source: 'https://monitor.pukeko.xyz'
|
||||
|
||||
- alert: ContainerRestartRate
|
||||
expr: rate(container_restart_total[5m]) > 0.2
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "High container restart rate on {{ $labels.instance }}"
|
||||
description: "Container restart rate is above 0.2 restarts per minute on {{ $labels.instance }}"
|
||||
source: 'https://monitor.pukeko.xyz'
|
||||
|
||||
- alert: ContainerHighPacketLoss
|
||||
expr: rate(container_network_receive_errors_total[5m]) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High packet loss on container network ({{ $labels.name }})"
|
||||
description: "Packet loss rate is above 0.1 errors per minute on {{ $labels.name }}"
|
||||
source: 'https://monitor.pukeko.xyz'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
- name: Backups
|
||||
rules:
|
||||
|
||||
Reference in New Issue
Block a user