More container alerts

This commit is contained in:
2023-11-17 17:01:38 +02:00
parent 3d90e52a15
commit d6e22b8c6b

View File

@@ -150,6 +150,39 @@ groups:
description: "The container update metric indicates a failure. Check logs for details." description: "The container update metric indicates a failure. Check logs for details."
source: 'https://monitor.pukeko.xyz' source: 'https://monitor.pukeko.xyz'
- alert: ContainerFailure
expr: container_last_seen == 0
for: 1h
labels:
severity: critical
annotations:
summary: "Container failure on {{ $labels.instance }}"
description: "No data received from a container for the last hour on {{ $labels.instance }}"
source: 'https://monitor.pukeko.xyz'
- alert: ContainerRestartRate
expr: rate(container_restart_total[5m]) > 0.2
for: 10m
labels:
severity: critical
annotations:
summary: "High container restart rate on {{ $labels.instance }}"
description: "Container restart rate is above 0.2 restarts per minute on {{ $labels.instance }}"
source: 'https://monitor.pukeko.xyz'
- alert: ContainerHighPacketLoss
expr: rate(container_network_receive_errors_total[5m]) > 0.1
for: 10m
labels:
severity: warning
annotations:
summary: "High packet loss on container network ({{ $labels.name }})"
description: "Packet loss rate is above 0.1 errors per minute on {{ $labels.name }}"
source: 'https://monitor.pukeko.xyz'
- name: Backups - name: Backups
rules: rules: