diff --git a/alertmanager/appconf/example.alertmanager.yml b/alertmanager/appconf/example.alertmanager.yml new file mode 100644 index 0000000..2a0267c --- /dev/null +++ b/alertmanager/appconf/example.alertmanager.yml @@ -0,0 +1,122 @@ +global: + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost: 'localhost:25' + smtp_from: 'alertmanager@example.org' + smtp_auth_username: 'alertmanager' + smtp_auth_password: 'password' + +# The directory from which notification templates are read. +templates: + - '/etc/alertmanager/template/*.tmpl' + +# The root route on which each incoming alert enters. +route: + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + # + # To aggregate by all possible labels use '...' as the sole label name. + # This effectively disables aggregation entirely, passing through all + # alerts as-is. This is unlikely to be what you want, unless you have + # a very low alert volume or your upstream notification system performs + # its own grouping. Example: group_by: [...] + group_by: ['alertname', 'cluster', 'service'] + + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 30s + + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 5m + + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 3h + + # A default receiver + receiver: team-X-mails + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + + # The child route trees. + routes: + # This routes performs a regular expression match on alert labels to + # catch alerts that are related to a list of services. + - matchers: + - service=~"foo1|foo2|baz" + receiver: team-X-mails + # The service has a sub-route for critical alerts, any alerts + # that do not match, i.e. severity != critical, fall-back to the + # parent node and are sent to 'team-X-mails' + routes: + - matchers: + - severity="critical" + receiver: team-X-pager + - matchers: + - service="files" + receiver: team-Y-mails + + routes: + - matchers: + - severity="critical" + receiver: team-Y-pager + + # This route handles all alerts coming from a database service. If there's + # no team to handle it, it defaults to the DB team. + - matchers: + - service="database" + receiver: team-DB-pager + # Also group alerts by affected database. + group_by: [alertname, cluster, database] + routes: + - matchers: + - owner="team-X" + receiver: team-X-pager + continue: true + - matchers: + - owner="team-Y" + receiver: team-Y-pager + + +# Inhibition rules allow to mute a set of alerts given that another alert is +# firing. +# We use this to mute any warning-level notifications if the same alert is +# already critical. +inhibit_rules: + - source_matchers: [severity="critical"] + target_matchers: [severity="warning"] + # Apply inhibition if the alertname is the same. + # CAUTION: + # If all label names listed in `equal` are missing + # from both the source and target alerts, + # the inhibition rule will apply! + equal: [alertname, cluster, service] + + +receivers: + - name: 'team-X-mails' + email_configs: + - to: 'team-X+alerts@example.org' + + - name: 'team-X-pager' + email_configs: + - to: 'team-X+alerts-critical@example.org' + pagerduty_configs: + - service_key: + + - name: 'team-Y-mails' + email_configs: + - to: 'team-Y+alerts@example.org' + + - name: 'team-Y-pager' + pagerduty_configs: + - service_key: + + - name: 'team-DB-pager' + pagerduty_configs: + - service_key: diff --git a/alertmanager/compose.yaml b/alertmanager/compose.yaml new file mode 100644 index 0000000..4c8d123 --- /dev/null +++ b/alertmanager/compose.yaml @@ -0,0 +1,16 @@ +services: + alertmanager: + container_name: alertmanager + image: quay.io/prometheus/alertmanager + restart: unless-stopped + volumes: + - ./appconf/alertmanager.yml:/etc/alertmanager/alertmanager.yml + networks: + - monitoring + labels: + # Diun + diun.enable: true + +networks: + monitoring: + external: true \ No newline at end of file