diff --git a/main.go b/main.go index aa99def8..3dde64ba 100644 --- a/main.go +++ b/main.go @@ -13,30 +13,39 @@ import ( "k8s.io/client-go/tools/clientcmd" "github.com/linki/chaoskube/chaoskube" + "github.com/linki/chaoskube/util" ) var ( - labelString string - annString string - nsString string - master string - kubeconfig string - interval time.Duration - inCluster bool - dryRun bool - debug bool - version string + annString string + debug bool + dryRun bool + excludeWeekends bool + inCluster bool + interval time.Duration + kubeconfig string + labelString string + master string + nsString string + percentage float64 + runFrom string + runUntil string + version string ) func init() { - kingpin.Flag("labels", "A set of labels to restrict the list of affected pods. Defaults to everything.").StringVar(&labelString) kingpin.Flag("annotations", "A set of annotations to restrict the list of affected pods. Defaults to everything.").StringVar(&annString) - kingpin.Flag("namespaces", "A set of namespaces to restrict the list of affected pods. Defaults to everything.").StringVar(&nsString) - kingpin.Flag("master", "The address of the Kubernetes cluster to target").StringVar(&master) - kingpin.Flag("kubeconfig", "Path to a kubeconfig file").StringVar(&kubeconfig) - kingpin.Flag("interval", "Interval between Pod terminations").Default("10m").DurationVar(&interval) - kingpin.Flag("dry-run", "If true, don't actually do anything.").Default("true").BoolVar(&dryRun) kingpin.Flag("debug", "Enable debug logging.").BoolVar(&debug) + kingpin.Flag("dry-run", "If true, don't actually do anything.").Default("true").BoolVar(&dryRun) + kingpin.Flag("excludeWeekends", "Do not run on weekends").BoolVar(&excludeWeekends) + kingpin.Flag("interval", "Interval between Pod terminations").Default("1m").DurationVar(&interval) + kingpin.Flag("kubeconfig", "Path to a kubeconfig file").StringVar(&kubeconfig) + kingpin.Flag("labels", "A set of labels to restrict the list of affected pods. Defaults to everything.").StringVar(&labelString) + kingpin.Flag("master", "The address of the Kubernetes cluster to target").StringVar(&master) + kingpin.Flag("namespaces", "A set of namespaces to restrict the list of affected pods. Defaults to everything.").StringVar(&nsString) + kingpin.Flag("percentage", "How likely should a pod be killed every single run").Default("0.0").Float64Var(&percentage) + kingpin.Flag("run-from", "Start chaoskube daily at hours:minutes, e.g. 9:00").Default("0:00").StringVar(&runFrom) + kingpin.Flag("run-until", "Stop chaoskube daily at hours:minutes, e.g. 17:00").Default("0:00").StringVar(&runUntil) } func main() { @@ -93,13 +102,22 @@ func main() { time.Now().UTC().UnixNano(), ) + ticker := time.NewTicker(interval) for { - if err := chaoskube.TerminateVictim(); err != nil { - log.Fatal(err) + select { + case <-ticker.C: + if util.ShouldRunNow(excludeWeekends, runFrom, runUntil) { + candidates, err := chaoskube.Candidates() + if err != nil { + log.Fatal(err) + } + for _, candidate := range candidates { + if util.PodShouldDie(candidate, interval, percentage) { + chaoskube.DeletePod(candidate) + } + } + } } - - log.Debugf("Sleeping for %s...", interval) - time.Sleep(interval) } } diff --git a/util/util.go b/util/util.go index 48d7e3e6..7c2d74a8 100644 --- a/util/util.go +++ b/util/util.go @@ -1,22 +1,136 @@ package util import ( + "math/rand" + "strconv" + "strings" + "time" + + log "github.com/sirupsen/logrus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/pkg/api/v1" ) +var timeNow = timeNowFunc + +func init() { + rand.Seed(timeNow().Unix()) +} + +func timeNowFunc() time.Time { + return time.Now() +} + // NewPod returns a new pod instance for testing purposes. -func NewPod(namespace, name string) v1.Pod { +func NewPod(namespace, name string, schedule ...string) v1.Pod { + labels := map[string]string{"app": name} + if len(schedule) > 0 { + labels["chaos.schedule"] = schedule[0] + } return v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, Name: name, - Labels: map[string]string{ - "app": name, - }, + Labels: labels, Annotations: map[string]string{ "chaos": name, }, }, } } + +// takes a string containing a time (e.g. "23:42" and returns time object with that time today +func stringToTime(str string) (time.Time, error) { + now := timeNow() + year, month, day := now.Date() + time, err := time.Parse("15:04", str) + if err != nil { + return now, err + } + return time.AddDate(year, int(month)-1, day-1), nil +} + +// takes two strings containing a time (e.g. "09:00" and "17:00") and returns 2 +// time objects so that the "runFrom" one is before the "runUntil" one unless +// it needs to be after because of situations like runfrom 17:00 to 05:00 +func startAndEndTime(runFrom string, runUntil string) (time.Time, time.Time, error) { + start, err := stringToTime(runFrom) + if err != nil { + return timeNow(), timeNow(), err + } + end, err := stringToTime(runUntil) + if err != nil { + return timeNow(), timeNow(), err + } + // start this day and end the next day and be after start which means end + // will have to be moved to the next day + if end.Before(start) && timeNow().After(start) { + return start, end.AddDate(0, 0, 1), nil + } + return start, end, nil +} + +// checks whether time.Now() is between runFrom and runUntil and whether it +// should run during the weekend +func ShouldRunNow(excludeWeekends bool, runFrom string, runUntil string) bool { + now := timeNow() + // Exclude weekends, sunday = day 0, saturday = day 6 + weekday := now.Weekday() + if excludeWeekends && (weekday == 0 || weekday == 6) { + return false + } + // no input was specified + if runFrom == runUntil && runFrom == "0:00" { + return true + } + start, end, err := startAndEndTime(runFrom, runUntil) + if err != nil { + log.Info("Converting times errored. No action will be taken.") + return false + } + if now.After(start) && now.Before(end) { + return true + } + return false +} + +func parseLabel(label string) (rate int, span int, err error) { + split := strings.Split(label, ".") + if len(split) != 2 { + return 0, 0, err + } + rate_str, span_str := split[0], split[1] + rate, err = strconv.Atoi(rate_str) + if err != nil { + return 0, 0, err + } + switch span_str { + case "hour": + span = 60 + case "day": + span = 60 * 24 + case "week": + span = 60 * 24 * 7 + } + return +} + +func getOdds(p v1.Pod, interval time.Duration, percentage float64) float64 { + labels := p.GetLabels() + if labels["chaos.schedule"] == "" { + return percentage + } + rate, span, err := parseLabel(labels["chaos.schedule"]) + if err != nil { + log.Errorf("Error: %v from parsing %v's chaos.schedule, which is %s", err, p.Name, labels["chaos.schedule"]) + return 0.0 + } + return (float64(rate) * interval.Minutes()) / float64(span) +} + +func PodShouldDie(p v1.Pod, interval time.Duration, percentage float64) bool { + odds := getOdds(p, interval, percentage) + random := rand.Float64() + return (random <= odds) +} diff --git a/util/util_test.go b/util/util_test.go new file mode 100644 index 00000000..81941d38 --- /dev/null +++ b/util/util_test.go @@ -0,0 +1,128 @@ +package util + +import ( + "testing" + "time" +) + +func TestStringToTime(t *testing.T) { + nine, err := stringToTime("9:00") + if err != nil { + t.Fatal("stringToTime errored") + } + if nine.Hour() != 9 { + t.Fatal("stringToTime failed to parse hour") + } + if nine.Minute() != 0 { + t.Fatal("stringToTime failed to parse minutes") + } + _, err = stringToTime("9:00:00") + if err == nil { + t.Fatal("stringToTime should have failed") + } +} + +func TestStartAndEndTime(t *testing.T) { + t1, t2, err := startAndEndTime("09:00", "17:00") + if err != nil { + t.Fatal("startAndEndTime errored") + } + if t1.Hour() != 9 { + t.Fatal("startAndEndTime didn't parse time correctly") + } + if t2.Hour() != 17 { + t.Fatal("startAndEndTime didn't parse time correctly") + } + if t1.After(t2) { + t.Fatal("startAndEndTime didn't return the right times") + } + y_now, m_now, d_now := time.Now().Date() + y_1, m_1, d_1 := t1.Date() + y_2, m_2, d_2 := t2.Date() + if y_now != y_1 || y_1 != y_2 { + t.Fatal("startAndEndTime years are wrong", y_now, y_1, y_2) + } + if m_now != m_1 || m_1 != m_2 { + t.Fatal("startAndEndTime months are wrong", m_now, m_1, m_2) + } + if d_now != d_1 || d_1 != d_2 { + t.Fatal("startAndEndTime days are wrong", d_now, d_1, d_2) + } +} + +func TestShouldRunNow(t *testing.T) { + y_now, m_now, d_now := time.Now().Date() + + // within the window it should run + timeNow = func() time.Time { return time.Date(y_now, m_now, d_now, 11, 30, 0, 0, time.UTC) } + if !ShouldRunNow(false, "9:00", "17:00") { + t.Fatal("ShouldRunNow for 11:30 returned false") + } + + // outside the window it should run + timeNow = func() time.Time { return time.Date(y_now, m_now, d_now, 19, 30, 0, 0, time.UTC) } + if ShouldRunNow(false, "9:00", "17:00") { + t.Fatal("ShouldRunNow for 19:30 returned true") + } + + // during a weekend, excludeWeekends = true, date is a this is a Sunday + timeNow = func() time.Time { return time.Date(2017, 12, 31, 11, 30, 0, 0, time.UTC) } + if ShouldRunNow(true, "9:00", "17:00") { + t.Fatal("ShouldRunNow for excludeWeekends, but within the time window returned false") + } + + // always run, but exclude the weekend + if ShouldRunNow(true, "0:00", "0:00") { + t.Fatal("ShouldRunNow for excludeWeekends returned true") + } + + // always run and include the weekend + if !ShouldRunNow(false, "0:00", "0:00") { + t.Fatal("ShouldRunNow for excludeWeekends returned false") + } +} + +func TestParseLabel(t *testing.T) { + labels := map[string]map[string]int{ + "1.hour": {"rate": 1, "span": 60}, + "2.day": {"rate": 2, "span": 1440}, + "3.week": {"rate": 3, "span": 10080}, + } + for k, v := range labels { + rate, span, err := parseLabel(k) + if err != nil { + t.Fatal("parseLabel errored") + } + if rate != v["rate"] { + t.Fatalf("parseLabel returned wrong rate want: %v got: %v", v["rate"], rate) + } + if span != v["span"] { + t.Fatalf("parseLabel returned wrong span want: %v got: %v", v["span"], span) + } + } +} + +func TestGetOdds(t *testing.T) { + schedules := map[string]float64{"1.hour": 1.0 / float64(60), "2.day": 2.0 / float64(60*24), "3.week": 3.0 / float64(60*24*7)} + percentage := 0.5 + for schedule, initial_odd := range schedules { + p := NewPod("default", "foo", schedule) + intervalls := []time.Duration{time.Minute * 1, time.Minute * 5, time.Minute * 10, time.Minute * 60} + for _, interval := range intervalls { + odd := getOdds(p, interval, 0.5) + target_odd := int(initial_odd * interval.Minutes() * 100) + conv_odd := int(odd * 100) + if conv_odd != target_odd { + t.Fatalf("getOdds returned wrong odd want: %v got: %v, schedule: %v, interval: %v, percentage: %v", + target_odd, odd, schedule, interval, percentage) + } + } + } + p := NewPod("default", "foo") + interval := 10 * time.Minute + odd := getOdds(p, interval, 0.5) + if odd != 0.5 { + t.Fatalf("getOdds returned wrong odd want: %v got: %v, schedule: %v, interval: %v, percentage: %v", + percentage, odd, "", interval, percentage) + } +}