diff --git a/config/alerts.yml b/config/alerts.yml index 3b5a4da..8536139 100755 --- a/config/alerts.yml +++ b/config/alerts.yml @@ -97,16 +97,6 @@ groups: - name: Docker rules: - - - alert: ContainerHighCpuUtilization - expr: (sum(rate(container_cpu_usage_seconds_total{name!=""}[3m])) BY (instance, name) * 100) > 80 - for: 2m - labels: - severity: warning - annotations: - summary: Container High CPU utilization (instance {{ $labels.instance }}) - description: "Container CPU utilization is above 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - source: 'https://monitor.pukeko.xyz' - alert: ContainerHighMemoryUsage expr: (sum(container_memory_working_set_bytes{name!=""}) BY (instance, name) / sum(container_spec_memory_limit_bytes > 0) BY (instance, name) * 100) > 80 @@ -208,3 +198,65 @@ groups: description: "The timer '{{ $labels.instance }}' has a failed status" source: 'https://monitor.pukeko.xyz' + - alert: SystemBackupFailed + expr: | + system_backup == 1 + labels: + severity: critical + annotations: + summary: "System Backup has failed" + description: "The repository {{ $labels.instance }} has failed the backup process" + + - alert: SystemBackupUnlockFailed + expr: | + system_backup == -1 + labels: + severity: critical + annotations: + summary: "System Backup cannot proceed" + description: "The repository {{ $labels.instance }} has failed to unlock. Backup cannot be performed" + + - alert: SystemBackupIntegrityFailed + expr: | + system_backup == -2 + labels: + severity: critical + annotations: + summary: "System Backup containes error" + description: "The repository {{ $labels.instance }} has failed an integrity check. Backup data may be corrupted" + + - alert: SystemBackupCleaningFailed + expr: | + system_backup == -3 + labels: + severity: critical + annotations: + summary: "System Backup cannot be cleaned" + description: "The repository {{ $labels.instance }} has failed the cleanup process. Backup may contain excess data" + + - alert: SystemBackupSucceeded + expr: | + system_backup == 0 + labels: + severity: info + annotations: + summary: "System Backup has succeeded" + description: "The repository {{ $labels.instance }} has succesfully completed the backup process" + + - alert: SystemBackupIntegritySucceeded + expr: | + system_backup == 2 + labels: + severity: info + annotations: + summary: "System Backup has no errors" + description: "The repository {{ $labels.instance }} passed the integrity check" + + - alert: SystemBackupCleaningSucceeded + expr: | + system_backup == 3 + labels: + severity: info + annotations: + summary: "System Backup has been cleaned" + description: "The repository {{ $labels.instance }} has completed the cleanup process"