Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/bosun: support actions by incident id #1696

Merged
merged 1 commit into from
Apr 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/bosun/sched/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.E
if si := silenced(ak); si != nil && event.Status == models.StNormal {
go func(ak models.AlertKey) {
slog.Infof("auto close %s because was silenced", ak)
err := s.Action("bosun", "Auto close because was silenced.", models.ActionClose, ak)
err := s.ActionByAlertKey("bosun", "Auto close because was silenced.", models.ActionClose, ak)
if err != nil {
slog.Errorln(err)
}
Expand Down
4 changes: 2 additions & 2 deletions cmd/bosun/sched/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func TestCheckFlapping(t *testing.T) {
r.Events[ak].Status = models.StNormal
s.RunHistory(r)
// Close the alert, so it should notify next time.
if err := s.Action("", "", models.ActionClose, ak); err != nil {
if err := s.ActionByAlertKey("", "", models.ActionClose, ak); err != nil {
t.Fatal(err)
}
r.Events[ak].Status = models.StWarning
Expand Down Expand Up @@ -179,7 +179,7 @@ func TestIncidentIds(t *testing.T) {

r.Events[ak].Status = models.StNormal
s.RunHistory(r)
err = s.Action("", "", models.ActionClose, ak)
err = s.ActionByAlertKey("", "", models.ActionClose, ak)
if err != nil {
t.Fatal(err)
}
Expand Down
48 changes: 32 additions & 16 deletions cmd/bosun/sched/sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ func (s *Schedule) MarshalGroups(T miniprofiler.Timer, filter string) (*StateGro
a := s.Conf.Alerts[k.Name()]
if a == nil {
slog.Errorf("unknown alert %s. Force closing.", k.Name())
if err2 = s.Action("bosun", "closing because alert doesn't exist.", models.ActionForceClose, k); err2 != nil {
if err2 = s.ActionByAlertKey("bosun", "closing because alert doesn't exist.", models.ActionForceClose, k); err2 != nil {
slog.Error(err2)
}
continue
Expand Down Expand Up @@ -574,52 +574,68 @@ func init() {
"The running count of actions performed by individual users (Closed alert, Acknowledged alert, etc).")
}

func (s *Schedule) Action(user, message string, t models.ActionType, ak models.AlertKey) error {
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
func (s *Schedule) ActionByAlertKey(user, message string, t models.ActionType, ak models.AlertKey) error {
st, err := s.DataAccess.State().GetLatestIncident(ak)
if err != nil {
return err
}
if st == nil {
return fmt.Errorf("no such alert key: %v", ak)
}
_, err = s.action(user, message, t, st)
return err
}

func (s *Schedule) ActionByIncidentId(user, message string, t models.ActionType, id int64) (models.AlertKey, error) {
st, err := s.DataAccess.State().GetIncidentState(id)
if err != nil {
return "", err
}
if st == nil {
return "", fmt.Errorf("no incident with id: %v", id)
}
return s.action(user, message, t, st)
}

func (s *Schedule) action(user, message string, t models.ActionType, st *models.IncidentState) (models.AlertKey, error) {
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
isUnknown := st.LastAbnormalStatus == models.StUnknown
timestamp := utcNow()
switch t {
case models.ActionAcknowledge:
if !st.NeedAck {
return fmt.Errorf("alert already acknowledged")
return "", fmt.Errorf("alert already acknowledged")
}
if !st.Open {
return fmt.Errorf("cannot acknowledge closed alert")
return "", fmt.Errorf("cannot acknowledge closed alert")
}
st.NeedAck = false
if err := s.DataAccess.Notifications().ClearNotifications(ak); err != nil {
return err
if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
return "", err
}
case models.ActionClose:
if st.IsActive() {
return fmt.Errorf("cannot close active alert")
return "", fmt.Errorf("cannot close active alert")
}
fallthrough
case models.ActionForceClose:
st.Open = false
st.End = &timestamp
case models.ActionForget:
if !isUnknown {
return fmt.Errorf("can only forget unknowns")
return "", fmt.Errorf("can only forget unknowns")
}
fallthrough
case models.ActionPurge:
return s.DataAccess.State().Forget(ak)
return st.AlertKey, s.DataAccess.State().Forget(st.AlertKey)
default:
return fmt.Errorf("unknown action type: %v", t)
return "", fmt.Errorf("unknown action type: %v", t)
}
// Would like to also track the alert group, but I believe this is impossible because any character
// that could be used as a delimiter could also be a valid tag key or tag value character
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
st.Actions = append(st.Actions, models.Action{
Expand All @@ -628,8 +644,8 @@ func (s *Schedule) Action(user, message string, t models.ActionType, ak models.A
Type: t,
User: user,
})
_, err = s.DataAccess.State().UpdateIncidentState(st)
return err
_, err := s.DataAccess.State().UpdateIncidentState(st)
return st.AlertKey, err
}

type IncidentStatus struct {
Expand Down
11 changes: 10 additions & 1 deletion cmd/bosun/web/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ func Action(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) (inter
User string
Message string
Keys []string
Ids []int64
Notify bool
}
j := json.NewDecoder(r.Body)
Expand Down Expand Up @@ -588,13 +589,21 @@ func Action(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) (inter
if err != nil {
return nil, err
}
err = schedule.Action(data.User, data.Message, at, ak)
err = schedule.ActionByAlertKey(data.User, data.Message, at, ak)
if err != nil {
errs[key] = err
} else {
successful = append(successful, ak)
}
}
for _, id := range data.Ids {
ak, err := schedule.ActionByIncidentId(data.User, data.Message, at, id)
if err != nil {
errs[fmt.Sprintf("%v", id)] = err
} else {
successful = append(successful, ak)
}
}
if len(errs) != 0 {
return nil, errs
}
Expand Down