Alerts

task_high_memory_usage_1g (1 active)
alert: task_high_memory_usage_1g
expr: sum
  by(container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id)
  (container_memory_rss{container_label_com_docker_swarm_task_name=~".+"})
  > 1e+09
for: 1m
annotations:
  description: '{{ $labels.container_label_com_docker_swarm_task_name }} on '{{
    $labels.container_label_com_docker_swarm_node_id }}' memory usage is {{ humanize
    $value}}.'
  summary: Memory alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name
    }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}'
Labels State Active Since Value
alertname="task_high_memory_usage_1g" container_label_com_docker_swarm_node_id="bcm78wvcw2woi8gyoe2yz6ri8" container_label_com_docker_swarm_task_name="prod_backend.1.t3n9xx61m0yhqiyfuw6jubrlo" firing 2026-04-20 11:47:18.069511962 +0000 UTC 1.010016256e+09
task_high_cpu_usage_50 (1 active)
alert: task_high_cpu_usage_50
expr: sum
  by(container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id)
  (rate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_task_name=~".+"}[1m]))
  * 100 > 50
for: 1m
annotations:
  description: '{{ $labels.container_label_com_docker_swarm_task_name }} on '{{
    $labels.container_label_com_docker_swarm_node_id }}' CPU usage is at {{ humanize
    $value}}%.'
  summary: CPU alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name
    }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}'
Labels State Active Since Value
alertname="task_high_cpu_usage_50" container_label_com_docker_swarm_node_id="bcm78wvcw2woi8gyoe2yz6ri8" container_label_com_docker_swarm_task_name="swarmprom_cadvisor.bcm78wvcw2woi8gyoe2yz6ri8.ge3vifaw0gv378mt8wd25w2b5" pending 2026-04-20 14:35:18.069511962 +0000 UTC 72.88270801161066
node_cpu_usage (0 active)
alert: node_cpu_usage
expr: 100
  - (avg by(node_name) (irate(node_cpu_seconds_total{mode="idle"}[1m]) * on(instance)
  group_left(node_name) node_meta * 100)) > 50
for: 1m
labels:
  severity: warning
annotations:
  description: Swarm node {{ $labels.node_name }} CPU usage is at {{ humanize $value}}%.
  summary: CPU alert for Swarm node '{{ $labels.node_name }}'
node_disk_fill_rate_6h (0 active)
alert: node_disk_fill_rate_6h
expr: predict_linear(node_filesystem_free_bytes{mountpoint="/rootfs"}[1h],
  6 * 3600) * on(instance) group_left(node_name) node_meta < 0
for: 1h
labels:
  severity: critical
annotations:
  description: Swarm node {{ $labels.node_name }} disk is going to fill up in 6h.
  summary: Disk fill alert for Swarm node '{{ $labels.node_name }}'
node_disk_usage (0 active)
alert: node_disk_usage
expr: ((node_filesystem_size_bytes{mountpoint="/rootfs"}
  - node_filesystem_free_bytes{mountpoint="/rootfs"}) * 100 / node_filesystem_size_bytes{mountpoint="/rootfs"})
  * on(instance) group_left(node_name) node_meta > 85
for: 1m
labels:
  severity: warning
annotations:
  description: Swarm node {{ $labels.node_name }} disk usage is at {{ humanize $value}}%.
  summary: Disk alert for Swarm node '{{ $labels.node_name }}'
node_memory_usage (0 active)
alert: node_memory_usage
expr: sum
  by(node_name) (((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) /
  node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta * 100)
  > 80
for: 1m
labels:
  severity: warning
annotations:
  description: Swarm node {{ $labels.node_name }} memory usage is at {{ humanize $value}}%.
  summary: Memory alert for Swarm node '{{ $labels.node_name }}'