diff --git a/cmd/bosun/sched/check.go b/cmd/bosun/sched/check.go index 0b856c0374..170e19f490 100644 --- a/cmd/bosun/sched/check.go +++ b/cmd/bosun/sched/check.go @@ -240,7 +240,7 @@ func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.E if si := silenced(ak); si != nil && event.Status == models.StNormal { go func(ak models.AlertKey) { slog.Infof("auto close %s because was silenced", ak) - err := s.Action("bosun", "Auto close because was silenced.", models.ActionClose, ak) + err := s.ActionByAlertKey("bosun", "Auto close because was silenced.", models.ActionClose, ak) if err != nil { slog.Errorln(err) } diff --git a/cmd/bosun/sched/check_test.go b/cmd/bosun/sched/check_test.go index 9ecf399e18..230b88e4e6 100644 --- a/cmd/bosun/sched/check_test.go +++ b/cmd/bosun/sched/check_test.go @@ -84,7 +84,7 @@ func TestCheckFlapping(t *testing.T) { r.Events[ak].Status = models.StNormal s.RunHistory(r) // Close the alert, so it should notify next time. - if err := s.Action("", "", models.ActionClose, ak); err != nil { + if err := s.ActionByAlertKey("", "", models.ActionClose, ak); err != nil { t.Fatal(err) } r.Events[ak].Status = models.StWarning @@ -179,7 +179,7 @@ func TestIncidentIds(t *testing.T) { r.Events[ak].Status = models.StNormal s.RunHistory(r) - err = s.Action("", "", models.ActionClose, ak) + err = s.ActionByAlertKey("", "", models.ActionClose, ak) if err != nil { t.Fatal(err) } diff --git a/cmd/bosun/sched/sched.go b/cmd/bosun/sched/sched.go index cbd83948e6..495bdac29d 100644 --- a/cmd/bosun/sched/sched.go +++ b/cmd/bosun/sched/sched.go @@ -386,7 +386,7 @@ func (s *Schedule) MarshalGroups(T miniprofiler.Timer, filter string) (*StateGro a := s.Conf.Alerts[k.Name()] if a == nil { slog.Errorf("unknown alert %s. Force closing.", k.Name()) - if err2 = s.Action("bosun", "closing because alert doesn't exist.", models.ActionForceClose, k); err2 != nil { + if err2 = s.ActionByAlertKey("bosun", "closing because alert doesn't exist.", models.ActionForceClose, k); err2 != nil { slog.Error(err2) } continue @@ -574,10 +574,7 @@ func init() { "The running count of actions performed by individual users (Closed alert, Acknowledged alert, etc).") } -func (s *Schedule) Action(user, message string, t models.ActionType, ak models.AlertKey) error { - if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil { - slog.Errorln(err) - } +func (s *Schedule) ActionByAlertKey(user, message string, t models.ActionType, ak models.AlertKey) error { st, err := s.DataAccess.State().GetLatestIncident(ak) if err != nil { return err @@ -585,23 +582,42 @@ func (s *Schedule) Action(user, message string, t models.ActionType, ak models.A if st == nil { return fmt.Errorf("no such alert key: %v", ak) } + _, err = s.action(user, message, t, st) + return err +} + +func (s *Schedule) ActionByIncidentId(user, message string, t models.ActionType, id int64) (models.AlertKey, error) { + st, err := s.DataAccess.State().GetIncidentState(id) + if err != nil { + return "", err + } + if st == nil { + return "", fmt.Errorf("no incident with id: %v", id) + } + return s.action(user, message, t, st) +} + +func (s *Schedule) action(user, message string, t models.ActionType, st *models.IncidentState) (models.AlertKey, error) { + if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil { + slog.Errorln(err) + } isUnknown := st.LastAbnormalStatus == models.StUnknown timestamp := utcNow() switch t { case models.ActionAcknowledge: if !st.NeedAck { - return fmt.Errorf("alert already acknowledged") + return "", fmt.Errorf("alert already acknowledged") } if !st.Open { - return fmt.Errorf("cannot acknowledge closed alert") + return "", fmt.Errorf("cannot acknowledge closed alert") } st.NeedAck = false - if err := s.DataAccess.Notifications().ClearNotifications(ak); err != nil { - return err + if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil { + return "", err } case models.ActionClose: if st.IsActive() { - return fmt.Errorf("cannot close active alert") + return "", fmt.Errorf("cannot close active alert") } fallthrough case models.ActionForceClose: @@ -609,17 +625,17 @@ func (s *Schedule) Action(user, message string, t models.ActionType, ak models.A st.End = ×tamp case models.ActionForget: if !isUnknown { - return fmt.Errorf("can only forget unknowns") + return "", fmt.Errorf("can only forget unknowns") } fallthrough case models.ActionPurge: - return s.DataAccess.State().Forget(ak) + return st.AlertKey, s.DataAccess.State().Forget(st.AlertKey) default: - return fmt.Errorf("unknown action type: %v", t) + return "", fmt.Errorf("unknown action type: %v", t) } // Would like to also track the alert group, but I believe this is impossible because any character // that could be used as a delimiter could also be a valid tag key or tag value character - if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil { + if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil { slog.Errorln(err) } st.Actions = append(st.Actions, models.Action{ @@ -628,8 +644,8 @@ func (s *Schedule) Action(user, message string, t models.ActionType, ak models.A Type: t, User: user, }) - _, err = s.DataAccess.State().UpdateIncidentState(st) - return err + _, err := s.DataAccess.State().UpdateIncidentState(st) + return st.AlertKey, err } type IncidentStatus struct { diff --git a/cmd/bosun/web/web.go b/cmd/bosun/web/web.go index f0d3f344da..a21572b8e3 100644 --- a/cmd/bosun/web/web.go +++ b/cmd/bosun/web/web.go @@ -561,6 +561,7 @@ func Action(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) (inter User string Message string Keys []string + Ids []int64 Notify bool } j := json.NewDecoder(r.Body) @@ -588,13 +589,21 @@ func Action(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) (inter if err != nil { return nil, err } - err = schedule.Action(data.User, data.Message, at, ak) + err = schedule.ActionByAlertKey(data.User, data.Message, at, ak) if err != nil { errs[key] = err } else { successful = append(successful, ak) } } + for _, id := range data.Ids { + ak, err := schedule.ActionByIncidentId(data.User, data.Message, at, id) + if err != nil { + errs[fmt.Sprintf("%v", id)] = err + } else { + successful = append(successful, ak) + } + } if len(errs) != 0 { return nil, errs }