diff --git a/README.md b/README.md index 0fbd25e..a8cd460 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,46 @@ You must specify, at least, the following parameters via command options or envi --slack-channel value Slack channel id [$SLACK_CHANNEL] ``` +## Ignoring alerts + +There are three flags that allows you to suppress an event, all of them can be used simultaneously: +* `--ignore-events`: Ignore all notifications of the specified event types. +* `--ignore-resources`: Ignore all notifications related to the specified resource, note that the notification will only be suppressed +if all of its resources are ignored. +* `--ignore-resource-event`: Ignore only the specified event type of that specific resource, format `:` + +All options allows multiple resources/events to be specified by using comma separated values: +``` +--ignore-events "AWS_ELASTICACHE_BEFORE_UPDATE_DUE_NOTIFICATION,AWS_VPN_SINGLE_TUNNEL_NOTIFICATION" +--ignore-resources "elasticache-0,elasticache-1" +--ignore-resource-event "AWS_ELASTICACHE_BEFORE_UPDATE_DUE_NOTIFICATION:elasticache-0,AWS_VPN_SINGLE_TUNNEL_NOTIFICATION:vpn-01234567890abcdef" +``` + +Unfortunately (AFAIK) theres no documentation for all of the event types and resource identifiers (sometimes this is the ARN but +other times it is the resource name), I suggest extracting them from the Slack message. + +Elasticache update example: +``` +Event ARN: arn:aws:health:us-east-1::event/ELASTICACHE/AWS_ELASTICACHE_BEFORE_UPDATE_DUE_NOTIFICATION/AWS_ELASTICACHE_BEFORE_UPDATE_DUE_NOTIFICATION-us-east-1-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee + ^ + event type + +Resource(s): elasticache-0, elasticache-1 + ^ + resource identifier +``` + +VPN single tunnel example: +``` +Event ARN: arn:aws:health:us-east-1::event/VPN/AWS_VPN_SINGLE_TUNNEL_NOTIFICATION/AWS_VPN_SINGLE_TUNNEL_NOTIFICATION-aaaaaaaaaaaa-us-east-1-2023-M04 + ^ + event type + +Resource(s): vpn-01234567890abcdef + ^ + resource identifier +``` + ## Helm chart A helm chart is available [here][chart] diff --git a/exporter/health.go b/exporter/health.go index a0d87ec..5d0c6d3 100644 --- a/exporter/health.go +++ b/exporter/health.go @@ -20,51 +20,77 @@ func (m *Metrics) HealthOrganizationEnabled(ctx context.Context) bool { return false } -func (m Metrics) SendSlackNotification(events []HealthEvent) { - for _, e := range events { +func (m *Metrics) GetHealthEvents() []HealthEvent { + var events []HealthEvent - resources := m.extractResources(e.AffectedResources) - accounts := m.extractAccounts(e.AffectedAccounts) - - service := *e.Event.Service - region := *e.Event.Region - status := e.Event.StatusCode - - var text, color string - attachmentFields := []slack.AttachmentField{ - {Title: "Account(s)", Value: accounts, Short: true}, - {Title: "Resource(s)", Value: resources, Short: true}, - {Title: "Service", Value: service, Short: true}, - {Title: "Region", Value: region, Short: true}, - {Title: "Start Time", Value: e.Event.StartTime.In(m.tz).String(), Short: true}, - {Title: "Status", Value: string(status), Short: true}, - {Title: "Event ARN", Value: fmt.Sprintf("`%s`", *e.Event.Arn), Short: false}, - {Title: "Updates", Value: *e.EventDescription.LatestDescription, Short: false}, - } + if m.organizationEnabled { + events = m.GetOrgEvents() + } else { + events = m.GetAccountEvents() + } - if status == healthTypes.EventStatusCodeClosed { - text = fmt.Sprintf(":heavy_check_mark:*[RESOLVED] The AWS Health issue with the %s service in the %s region is now resolved.*", service, region) - color = "18be52" - attachmentFields = append(attachmentFields[:6], attachmentFields[5:]...) - attachmentFields[5] = slack.AttachmentField{Title: "End Time", Value: e.Event.EndTime.In(m.tz).String(), Short: true} - } else { - text = fmt.Sprintf(":rotating_light:*[NEW] AWS Health reported an issue with the %s service in the %s region.*", service, region) - color = "danger" + for _, e := range events { + if ignoreEvents(m.ignoreEvents, *e.Event.EventTypeCode) { + continue } - attachment := slack.Attachment{ - Color: color, - Fields: attachmentFields, + if ignoreResources(m.ignoreResources, e.AffectedResources) { + // only ignore this event if all resources are ignored + continue } - _, _, err := m.slackApi.PostMessage( - m.slackChannel, - slack.MsgOptionText(text, false), - slack.MsgOptionAttachments(attachment), - ) - if err != nil { - panic(err.Error()) + if ignoreResourceEvent(m.ignoreResourceEvent, e) { + continue } + + m.SendSlackNotification(e) + } + + return events +} + +func (m Metrics) SendSlackNotification(e HealthEvent) { + resources := m.extractResources(e.AffectedResources) + accounts := m.extractAccounts(e.AffectedAccounts) + + service := *e.Event.Service + region := *e.Event.Region + status := e.Event.StatusCode + + var text, color string + attachmentFields := []slack.AttachmentField{ + {Title: "Account(s)", Value: accounts, Short: true}, + {Title: "Resource(s)", Value: resources, Short: true}, + {Title: "Service", Value: service, Short: true}, + {Title: "Region", Value: region, Short: true}, + {Title: "Start Time", Value: e.Event.StartTime.In(m.tz).String(), Short: true}, + {Title: "Status", Value: string(status), Short: true}, + {Title: "Event ARN", Value: fmt.Sprintf("`%s`", *e.Event.Arn), Short: false}, + {Title: "Updates", Value: *e.EventDescription.LatestDescription, Short: false}, + } + + if status == healthTypes.EventStatusCodeClosed { + text = fmt.Sprintf(":heavy_check_mark:*[RESOLVED] The AWS Health issue with the %s service in the %s region is now resolved.*", service, region) + color = "18be52" + attachmentFields = append(attachmentFields[:6], attachmentFields[5:]...) + attachmentFields[5] = slack.AttachmentField{Title: "End Time", Value: e.Event.EndTime.In(m.tz).String(), Short: true} + } else { + text = fmt.Sprintf(":rotating_light:*[NEW] AWS Health reported an issue with the %s service in the %s region.*", service, region) + color = "danger" + } + + attachment := slack.Attachment{ + Color: color, + Fields: attachmentFields, + } + + _, _, err := m.slackApi.PostMessage( + m.slackChannel, + slack.MsgOptionText(text, false), + slack.MsgOptionAttachments(attachment), + ) + if err != nil { + panic(err.Error()) } } @@ -93,3 +119,56 @@ func (m Metrics) extractAccounts(accounts []string) string { return "All accounts in region" } } + +func ignoreEvents(ignoredEvents []string, event string) bool { + for _, e := range ignoredEvents { + if e == event { + return true + } + } + + return false +} + +func ignoreResources(ignoredResources []string, resources []healthTypes.AffectedEntity) bool { + size := len(resources) + + for _, ignored := range ignoredResources { + for _, resource := range resources { + if *resource.EntityValue == ignored { + size -= 1 + } + } + } + + if size == 0 { + // all resources are ignored, ignoring entire alert + return true + } + + // not all resources are ignored + return false +} + +func ignoreResourceEvent(ignoredResourceEvent []string, event HealthEvent) bool { + size := len(event.AffectedResources) + + for _, ignored := range ignoredResourceEvent { + tmp := strings.Split(ignored, ":") + ignoredEvent, ignoredResource := tmp[0], tmp[1] + + for _, resource := range event.AffectedResources { + if *resource.EntityValue == ignoredResource && *event.Event.EventTypeCode == ignoredEvent { + size -= 1 + } + } + } + + if size == 0 { + // all resources are ignored, ignoring entire alert + return true + } + + // not all resources are ignored + return false +} diff --git a/exporter/metrics.go b/exporter/metrics.go index c854000..155198a 100644 --- a/exporter/metrics.go +++ b/exporter/metrics.go @@ -3,6 +3,7 @@ package exporter import ( "context" "os" + "sort" "strings" "time" _ "time/tzdata" @@ -65,7 +66,24 @@ func (m *Metrics) init(ctx context.Context, c *cli.Context) { if c.String("regions") != "all-regions" { m.regions = strings.Split(c.String("regions"), ",") + sort.Strings(m.regions) } + + if len(c.String("ignore-events")) > 0 { + m.ignoreEvents = strings.Split(c.String("ignore-events"), ",") + sort.Strings(m.ignoreEvents) + } + + if len(c.String("ignore-resources")) > 0 { + m.ignoreResources = strings.Split(c.String("ignore-resources"), ",") + sort.Strings(m.ignoreResources) + } + + if len(c.String("ignore-resource-event")) > 0 { + m.ignoreResourceEvent = strings.Split(c.String("ignore-resource-event"), ",") + sort.Strings(m.ignoreResourceEvent) + } + } func (m *Metrics) Describe(ch chan<- *prometheus.Desc) { @@ -73,18 +91,7 @@ func (m *Metrics) Describe(ch chan<- *prometheus.Desc) { } func (m *Metrics) Collect(ch chan<- prometheus.Metric) { - var events []HealthEvent - if m.organizationEnabled { - events = m.GetOrgEvents() - } else { - events = m.GetEvents() - } - - if len(events) == 0 { - return - } - - m.SendSlackNotification(events) + m.GetHealthEvents() } func sanitizeLabel(label string) string { diff --git a/exporter/single.go b/exporter/single.go index b91f7d4..5165996 100644 --- a/exporter/single.go +++ b/exporter/single.go @@ -8,7 +8,7 @@ import ( healthTypes "github.com/aws/aws-sdk-go-v2/service/health/types" ) -func (m *Metrics) GetEvents() []HealthEvent { +func (m *Metrics) GetAccountEvents() []HealthEvent { ctx := context.TODO() now := time.Now() pag := health.NewDescribeEventsPaginator( diff --git a/exporter/types.go b/exporter/types.go index 9e54c8c..b32cba3 100644 --- a/exporter/types.go +++ b/exporter/types.go @@ -22,6 +22,10 @@ type Metrics struct { awsconfig aws.Config organizationEnabled bool regions []string + + ignoreEvents []string + ignoreResources []string + ignoreResourceEvent []string } type HealthEvent struct { diff --git a/main.go b/main.go index 3acdc2b..cf6eaf0 100644 --- a/main.go +++ b/main.go @@ -22,6 +22,9 @@ func main() { &cli.StringFlag{Name: "slack-token", Usage: "Slack token", EnvVars: []string{"SLACK_TOKEN"}, Required: true}, &cli.StringFlag{Name: "slack-channel", Usage: "Slack channel id", EnvVars: []string{"SLACK_CHANNEL"}, Required: true}, &cli.StringFlag{Name: "assume-role", Usage: "Assume another AWS IAM role", EnvVars: []string{"ASSUME_ROLE"}}, + &cli.StringFlag{Name: "ignore-events", Usage: "Comma separated list of events to be ignored on all resources"}, + &cli.StringFlag{Name: "ignore-resources", Usage: "Comma separated list of resources to be ignored on all events, format is dependant on resource type (some are ARN others are Name, check AWS docs)"}, + &cli.StringFlag{Name: "ignore-resource-event", Usage: "Comma separated list of events to be ignored on a specific resource (format: :)"}, } app := &cli.App{