More container alerts
This commit is contained in:
@@ -150,6 +150,39 @@ groups:
|
|||||||
description: "The container update metric indicates a failure. Check logs for details."
|
description: "The container update metric indicates a failure. Check logs for details."
|
||||||
source: 'https://monitor.pukeko.xyz'
|
source: 'https://monitor.pukeko.xyz'
|
||||||
|
|
||||||
|
- alert: ContainerFailure
|
||||||
|
expr: container_last_seen == 0
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Container failure on {{ $labels.instance }}"
|
||||||
|
description: "No data received from a container for the last hour on {{ $labels.instance }}"
|
||||||
|
source: 'https://monitor.pukeko.xyz'
|
||||||
|
|
||||||
|
- alert: ContainerRestartRate
|
||||||
|
expr: rate(container_restart_total[5m]) > 0.2
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "High container restart rate on {{ $labels.instance }}"
|
||||||
|
description: "Container restart rate is above 0.2 restarts per minute on {{ $labels.instance }}"
|
||||||
|
source: 'https://monitor.pukeko.xyz'
|
||||||
|
|
||||||
|
- alert: ContainerHighPacketLoss
|
||||||
|
expr: rate(container_network_receive_errors_total[5m]) > 0.1
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "High packet loss on container network ({{ $labels.name }})"
|
||||||
|
description: "Packet loss rate is above 0.1 errors per minute on {{ $labels.name }}"
|
||||||
|
source: 'https://monitor.pukeko.xyz'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
- name: Backups
|
- name: Backups
|
||||||
rules:
|
rules:
|
||||||
|
|||||||
Reference in New Issue
Block a user