diff --git a/CHANGELOG.md b/CHANGELOG.md index 339c88c..ce787f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ Changelog ========= +## v0.3.0 + +[PR#6](https://github.com/ContainerSolutions/helm-monitor/pull/6): + +- add Sentry support +- adjust documentation and command line helpers + ## v0.2.0 [PR#5](https://github.com/ContainerSolutions/helm-monitor/pull/5): diff --git a/README.md b/README.md index fb7dd66..dede983 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Helm Monitor plugin =================== > Monitor a release, rollback to a previous version depending on the result of -a PromQL (Prometheus), Lucene or DSL query (ElasticSearch). +a PromQL (Prometheus), events (Sentry), Lucene or DSL query (ElasticSearch). ![Helm monitor failure](helm-monitor-failure.png) @@ -69,6 +69,23 @@ $ helm monitor elasticsearch --elasticsearch=http://elasticsearch:9200 \ 'status:500 AND kubernetes.labels.app:app AND version:2.0.0' ``` +### Sentry + +Monitor the **peeking-bunny** release against a Sentry server, a rollback is +initiated if the number of events is over 0 for the release 2.0.0: + +```bash +$ helm monitor sentry my-app \ + --api-key \ + --organization sentry \ + --project my-project \ + --sentry http://sentry:9000 \ + --tag release=2.0.0 \ + --regexp + 'Error with database connection.*' +``` + + ## Docker You can also use the Helm monitor backed Docker image to monitor: diff --git a/cmd/monitor.go b/cmd/monitor.go index a4aa30b..cf95129 100644 --- a/cmd/monitor.go +++ b/cmd/monitor.go @@ -15,6 +15,7 @@ import ( var ( settings helm_env.EnvSettings monitor *monitorCmd + verbose bool ) type monitorCmd struct { @@ -25,7 +26,6 @@ type monitorCmd struct { interval int64 rollbackTimeout int64 timeout int64 - verbose bool wait bool } @@ -56,7 +56,7 @@ func prettyError(err error) error { } func debug(format string, args ...interface{}) { - if monitor.verbose { + if verbose { format = fmt.Sprintf("[debug] %s\n", format) fmt.Printf(format, args...) } @@ -77,7 +77,7 @@ func newMonitorCmd(out io.Writer) *cobra.Command { p.Int64Var(&monitor.expectedResultCount, "expected-result-count", 0, "number of results that are expected to be returned by the query (rollback triggered if the number of results exceeds this value)") p.BoolVar(&monitor.force, "force", false, "force resource update through delete/recreate if needed") p.BoolVar(&monitor.wait, "wait", false, "if set, will wait until all Pods, PVCs, Services, and minimum number of Pods of a Deployment are in a ready state before marking a rollback as successful. It will wait for as long as --rollback-timeout") - p.BoolVarP(&monitor.verbose, "verbose", "v", false, "enable verbose output") + p.BoolVarP(&verbose, "verbose", "v", false, "enable verbose output") p.Int64Var(&monitor.rollbackTimeout, "rollback-timeout", 300, "time in seconds to wait for any individual Kubernetes operation during the rollback (like Jobs for hooks)") p.Int64Var(&monitor.timeout, "timeout", 300, "time in seconds to wait before assuming a monitoring action is successfull") p.Int64VarP(&monitor.interval, "interval", "i", 10, "time in seconds between each query") @@ -85,6 +85,7 @@ func newMonitorCmd(out io.Writer) *cobra.Command { cmd.AddCommand( newMonitorPrometheusCmd(out), newMonitorElasticSearchCmd(out), + newMonitorSentryCmd(out), ) return cmd diff --git a/cmd/monitor_elasticsearch.go b/cmd/monitor_elasticsearch.go index 3457541..680934e 100644 --- a/cmd/monitor_elasticsearch.go +++ b/cmd/monitor_elasticsearch.go @@ -23,18 +23,18 @@ empty result. The query argument can be either the path of a query DSL json file or a Lucene query string. -Usage with Lucene query: +Example with Lucene query: - $ helm monitor elasticsearch frontend 'status:500 AND kubernetes.labels.app:app AND version:2.0.0' + $ helm monitor elasticsearch my-release 'status:500 AND kubernetes.labels.app:app AND version:2.0.0' -Usage with query DSL file: +Example with query DSL file: - $ helm monitor elasticsearch frontend ./examples/elasticsearch-query.json + $ helm monitor elasticsearch my-release ./examples/elasticsearch-query.json Reference: - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-count.html + https://www.elastic.co/guide/en/elasticsearch/reference/current/search-count.html ` diff --git a/cmd/monitor_prometheus.go b/cmd/monitor_prometheus.go index ffeaba7..99c6062 100644 --- a/cmd/monitor_prometheus.go +++ b/cmd/monitor_prometheus.go @@ -20,14 +20,14 @@ This command monitor a release by querying Prometheus at a given interval and take care of rolling back to the previous version if the query return a non- empty result. -Usage: +Example: - $ helm monitor prometheus frontend 'rate(http_requests_total{code=~"^5.*$"}[5m]) > 0' + $ helm monitor prometheus my-release 'rate(http_requests_total{code=~"^5.*$"}[5m]) > 0' Reference: - https://prometheus.io/docs/prometheus/latest/querying/basics/ + https://prometheus.io/docs/prometheus/latest/querying/basics/ ` diff --git a/cmd/monitor_sentry.go b/cmd/monitor_sentry.go new file mode 100644 index 0000000..b3d18fb --- /dev/null +++ b/cmd/monitor_sentry.go @@ -0,0 +1,274 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "io/ioutil" + "net/http" + "os" + "os/signal" + "regexp" + "strings" + "syscall" + "time" + + "github.com/spf13/cobra" + "k8s.io/helm/pkg/helm" +) + +const monitorSentryDesc = ` +This command monitor a release by querying Sentry at a given interval and +take care of rolling back to the previous version if the query return a non- +empty result. + +Example: + + $ helm monitor sentry my-release \ + --api-key \ + --organization my-organization \ + --project my-project \ + --sentry https://sentry-endpoint/ \ + --tag environment=production \ + --tag release=2.0.0 \ + --message 'Error message' + +Example with event message matching regular expression: + + $ helm monitor sentry my-release \ + --api-key \ + --organization my-organization \ + --project my-project \ + --sentry https://sentry-endpoint/ \ + --message 'pointer.+' \ + --regexp + +` + +type monitorSentryCmd struct { + name string + out io.Writer + client helm.Interface + sentryAddr string + sentryAPIKey string + sentryOrganization string + sentryProject string + message string + regexp bool + tags []string +} + +type tag struct { + Key string `json:"key"` + Value string `json:"value"` +} + +type sentryEvent struct { + Message string `json:"message"` + Tags []*tag `json:"tags"` +} + +func newMonitorSentryCmd(out io.Writer) *cobra.Command { + m := &monitorSentryCmd{ + out: out, + } + + cmd := &cobra.Command{ + Use: "sentry [flags] RELEASE", + Short: "query a sentry server", + Long: monitorSentryDesc, + PreRunE: setupConnection, + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return fmt.Errorf("This command neeeds 1 argument: release name") + } + + m.name = args[0] + m.client = ensureHelmClient(m.client) + + return m.run() + }, + } + + f := cmd.Flags() + f.StringVar(&m.sentryAddr, "sentry", "http://localhost:9000", "sentry address") + f.StringVar(&m.sentryAPIKey, "api-key", "", "sentry api key") + f.StringVar(&m.sentryOrganization, "organization", "", "sentry organization") + f.StringVar(&m.sentryProject, "project", "", "sentry project") + f.StringVar(&m.message, "message", "", "event message to match") + f.BoolVar(&m.regexp, "regexp", false, "enable regular expression") + f.StringSliceVar(&m.tags, "tag", []string{}, "tags, ie: --tag release=2.0.0 --tag environment=production") + + cmd.MarkFlagRequired("api-key") + cmd.MarkFlagRequired("organization") + cmd.MarkFlagRequired("project") + + return cmd +} + +func convertStringToTags(s []string) (tagList []*tag) { + tagList = []*tag{} + for _, t := range s { + a := strings.Split(t, "=") + if len(a) != 2 { + debug("Provided tag is malformed, should match pattern key=value, got %s", t) + continue + } + tagList = append(tagList, &tag{ + Key: a[0], + Value: a[1], + }) + } + + return +} + +func matchEvents(eventList []*sentryEvent, message string, tagList []*tag, useRegexp bool) (output []*sentryEvent, err error) { + if message == "" && len(tagList) == 0 { + return eventList, nil + } + + var r *regexp.Regexp + if useRegexp { + r, err = regexp.Compile(message) + if err != nil { + return nil, err + } + } + + for _, event := range eventList { + match := false + if useRegexp && r.MatchString(event.Message) { + match = true + } else if message != "" && event.Message == message { + match = true + } + + if match && len(tagList) > 0 && !matchTags(tagList, event.Tags) { + continue + } + + if match { + output = append(output, event) + } + } + + return +} + +func matchTags(tagList []*tag, matchTagList []*tag) bool { + matchCount := 0 + for _, matchTag := range matchTagList { + for _, t := range tagList { + if matchTag.Key == t.Key && matchTag.Value == t.Value { + matchCount++ + } + } + } + if matchCount != len(tagList) { + return false + } + + return true +} + +func (m *monitorSentryCmd) run() error { + _, err := m.client.ReleaseContent(m.name) + if err != nil { + return prettyError(err) + } + + fmt.Fprintf(m.out, "Monitoring %s...\n", m.name) + + client := &http.Client{Timeout: 5 * time.Second} + + req, err := http.NewRequest("GET", m.sentryAddr+"/api/0/projects/"+m.sentryOrganization+"/"+m.sentryProject+"/events/", nil) + if err != nil { + return prettyError(err) + } + + req.Header.Add("Authorization", "Bearer "+m.sentryAPIKey) + + quit := make(chan os.Signal) + signal.Notify(quit, syscall.SIGTERM, syscall.SIGINT) + + ticker := time.NewTicker(time.Second * time.Duration(monitor.interval)) + + go func() { + time.Sleep(time.Second * time.Duration(monitor.timeout)) + fmt.Fprintf(m.out, "No results after %d second(s)\n", monitor.timeout) + close(quit) + }() + + for { + select { + case <-ticker.C: + debug("Processing URL %s", req.URL.String()) + + res, err := client.Do(req) + if err != nil { + return prettyError(err) + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + + if err != nil { + return prettyError(err) + } + + var response []*sentryEvent + err = json.Unmarshal(body, &response) + if err != nil { + return prettyError(err) + } + + debug("Response: %v", response) + debug("Result count: %d", len(response)) + + time.Sleep(30 * time.Second) + + events, err := matchEvents( + response, + m.message, + convertStringToTags(m.tags), + m.regexp, + ) + + if err != nil { + return prettyError(err) + } + + debug("Matched events: %d", len(events)) + + if len(events) > int(monitor.expectedResultCount) { + ticker.Stop() + + fmt.Fprintf(m.out, "Failure detected, rolling back...\n") + + _, err := m.client.RollbackRelease( + m.name, + helm.RollbackDryRun(monitor.dryRun), + helm.RollbackRecreate(false), + helm.RollbackForce(monitor.force), + helm.RollbackDisableHooks(monitor.disableHooks), + helm.RollbackVersion(0), + helm.RollbackTimeout(monitor.rollbackTimeout), + helm.RollbackWait(monitor.wait)) + + if err != nil { + return prettyError(err) + } + + fmt.Fprintf(m.out, "Successfully rolled back to previous revision!\n") + return nil + } + + case <-quit: + ticker.Stop() + debug("Quitting...") + return nil + } + } +} diff --git a/cmd/monitor_sentry_test.go b/cmd/monitor_sentry_test.go new file mode 100644 index 0000000..d0994ca --- /dev/null +++ b/cmd/monitor_sentry_test.go @@ -0,0 +1,218 @@ +package main + +import ( + "fmt" + "reflect" + "testing" + + "github.com/davecgh/go-spew/spew" +) + +func TestConvertStringToTags(t *testing.T) { + for _, test := range []struct { + name string + input []string + expected []*tag + }{ + { + name: "it should convert a list of string into tags", + input: []string{"key1=value1", "key2=value2"}, + expected: []*tag{ + &tag{Key: "key1", Value: "value1"}, + &tag{Key: "key2", Value: "value2"}, + }, + }, + { + name: "it should not convert wrongly formatted tags", + input: []string{"key1"}, + expected: []*tag{}, + }, + } { + t.Run(fmt.Sprintf("%s", test.name), func(t *testing.T) { + output := convertStringToTags(test.input) + if !reflect.DeepEqual(test.expected, output) { + t.Errorf( + "\ngiven %v\nexpected: %v\ngot: %v\n", + spew.Sdump(test.input), + spew.Sdump(test.expected), + spew.Sdump(output), + ) + } + }) + } +} + +type matchTagsInput struct { + tagList []*tag + matchTagList []*tag +} + +func TestMatchTags(t *testing.T) { + for _, test := range []struct { + name string + input matchTagsInput + expected bool + }{ + { + name: "it should return true if the tags matches", + input: matchTagsInput{ + tagList: []*tag{ + &tag{Key: "key2", Value: "value2"}, + }, + matchTagList: []*tag{ + &tag{Key: "key1", Value: "value1"}, + &tag{Key: "key2", Value: "value2"}, + &tag{Key: "key3", Value: "value3"}, + }, + }, + expected: true, + }, + { + name: "it should return false if not tags matches", + input: matchTagsInput{ + tagList: []*tag{ + &tag{Key: "key2", Value: "value2"}, + }, + matchTagList: []*tag{ + &tag{Key: "key1", Value: "value1"}, + &tag{Key: "key3", Value: "value3"}, + }, + }, + expected: false, + }, + } { + t.Run(fmt.Sprintf("%s", test.name), func(t *testing.T) { + output := matchTags(test.input.tagList, test.input.matchTagList) + if !reflect.DeepEqual(test.expected, output) { + t.Errorf( + "\ngiven %v\nexpected: %v\ngot: %v\n", + spew.Sdump(test.input), + spew.Sdump(test.expected), + spew.Sdump(output), + ) + } + }) + } +} + +type matchEventsInput struct { + eventList []*sentryEvent + message string + tagList []*tag + useRegexp bool +} + +func TestMatchEvents(t *testing.T) { + for _, test := range []struct { + name string + input matchEventsInput + expected []*sentryEvent + }{ + { + name: "it should match events by message and tags", + input: matchEventsInput{ + message: "This is an event", + tagList: []*tag{ + &tag{Key: "key2", Value: "value2"}, + &tag{Key: "key3", Value: "value3"}, + }, + eventList: []*sentryEvent{ + &sentryEvent{ + Message: "This is an event", + Tags: []*tag{ + &tag{Key: "key1", Value: "value1"}, + }, + }, + &sentryEvent{ + Message: "This is an event", + Tags: []*tag{ + &tag{Key: "key1", Value: "value1"}, + &tag{Key: "key2", Value: "value2"}, + &tag{Key: "key3", Value: "value3"}, + }, + }, + &sentryEvent{ + Message: "", + Tags: []*tag{ + &tag{Key: "key2", Value: "value2"}, + &tag{Key: "key3", Value: "value3"}, + }, + }, + &sentryEvent{ + Message: "This is an event", + Tags: []*tag{ + &tag{Key: "key2", Value: "value2"}, + &tag{Key: "key3", Value: "value3"}, + }, + }, + }, + }, + expected: []*sentryEvent{ + &sentryEvent{ + Message: "This is an event", + Tags: []*tag{ + &tag{Key: "key1", Value: "value1"}, + &tag{Key: "key2", Value: "value2"}, + &tag{Key: "key3", Value: "value3"}, + }, + }, + &sentryEvent{ + Message: "This is an event", + Tags: []*tag{ + &tag{Key: "key2", Value: "value2"}, + &tag{Key: "key3", Value: "value3"}, + }, + }, + }, + }, + { + name: "it should match events by regular expression", + input: matchEventsInput{ + eventList: []*sentryEvent{ + &sentryEvent{Message: "This is a first event"}, + &sentryEvent{Message: "This is a second event"}, + &sentryEvent{Message: "This is a third event"}, + }, + message: "first|second", + useRegexp: true, + }, + expected: []*sentryEvent{ + &sentryEvent{Message: "This is a first event"}, + &sentryEvent{Message: "This is a second event"}, + }, + }, + { + name: "it should match all events if message and tag list is not provided", + input: matchEventsInput{ + eventList: []*sentryEvent{ + &sentryEvent{Message: "This is a first event"}, + &sentryEvent{Message: "This is a second event"}, + &sentryEvent{Message: "This is a third event"}, + }, + message: "", + useRegexp: false, + }, + expected: []*sentryEvent{ + &sentryEvent{Message: "This is a first event"}, + &sentryEvent{Message: "This is a second event"}, + &sentryEvent{Message: "This is a third event"}, + }, + }, + } { + t.Run(fmt.Sprintf("%s", test.name), func(t *testing.T) { + output, err := matchEvents( + test.input.eventList, + test.input.message, + test.input.tagList, + test.input.useRegexp, + ) + if !reflect.DeepEqual(test.expected, output) || err != nil { + t.Errorf("\ngiven: \n%v\nexpected: \n%v\ngot: \n%v", + spew.Sdump(test.input), + spew.Sdump(test.expected), + spew.Sdump(output), + ) + } + }) + } +} diff --git a/examples/Makefile b/examples/Makefile index e3fe93f..2d4f2cd 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -4,12 +4,10 @@ PROMETHEUS_VERSION=7.0.2 build: eval $$(minikube docker-env) && \ docker build \ - -v $${HOME}/go:/root/go \ --build-arg "LDFLAGS=-X main.version=1.0.0" \ -t my-app:1.0.0 \ app && \ docker build \ - -v $${HOME}/go:/root/go \ --build-arg "LDFLAGS=-X main.version=2.0.0" \ -t my-app:2.0.0 \ app diff --git a/examples/README.md b/examples/README.md index b5f1152..d3bffec 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,16 +1,17 @@ Helm monitor example ==================== -> In this example, we run the Prometheus operator and a GoLang application in -Minikube, upgrade then monitor for HTTP failure. If the amount of 5xx reach a -certain limit, then the application get automatically rolled back to its -previous state. - +> This example demonstrate how to use helm-monitor to rollback a Helm release +based on events. ## Prepare -``` -# initialise Tiller +Prepare you environment (Minikube, Tiller and build the required images): + +```bash +# initialise Tiller with RBAC +$ kubectl create serviceaccount tiller -n kube-system +$ kubectl create clusterrolebinding tiller --clusterrole=cluster-admin --serviceaccount=kube-system:tiller $ helm init --wait # build the application for Minikube @@ -23,106 +24,8 @@ $ helm upgrade -i my-app --set image.tag=1.0.0 ./app/charts $ minikube service my-app ``` -## Prometheus - -### Setup - -Install Prometheus: - -``` -$ helm install \ - --version 7.0.2 \ - --set server.service.type=Loadbalancer \ - --set server.global.scrape_interval=30s \ - --set alertmanager.enabled=false \ - --set kubeStateMetrics.enabled=false \ - --set nodeExporter.enabled=false \ - --set pushgateway.enabled=false \ - --name prometheus \ - stable/prometheus -``` - -Access Prometheus: - -``` -$ minikube service prometheus -``` - -### Upgrade and monitor - -``` -# get Prometheus endpoint -$ prometheus=$(minikube service prometheus-server --url) - -# release version 2 -$ helm upgrade -i my-app --set image.tag=2.0.0 ./app/charts - -# monitor -$ helm monitor prometheus my-app --prometheus $prometheus 'rate(http_requests_total{code=~"^5.*$",version="2.0.0"}[5m]) > 0' -``` - -In a new terminal, simulate internal server failure: - -``` -$ app=$(minikube service my-app --url) -$ while sleep 1; do curl "$app"/internal-error; done -``` - -## ElasticSearch - -### Setup - -Minikube support the EFK stack via addons, to enable it: - -``` -$ minikube addons enable efk -``` - -If Minikube was already running, you might need to restart it in order to have -the EFK stack up and running: +## Follow the monitoring procedure from the example below -``` -$ minikube stop -$ minikube start -``` - -Access Kibana: - -``` -$ minikube service kibana-logging -n kube-system -``` - -### Upgrade and monitor - -``` -$ kubectl port-forward -n kube-system $(kubectl get po -n kube-system -l k8s-app=elasticsearch-logging -o jsonpath="{.items[0].metadata.name}") 9200 -$ helm upgrade -i my-app --set image.tag=2.0.0 ./app/charts -``` - -Monitor using via query DSL: - -``` -$ helm monitor elasticsearch my-app ./elasticsearch-query.json -``` - -Or via Lucene query - -``` -$ helm monitor elasticsearch my-app "status:500 AND kubernetes.labels.app:app AND version:2.0.0" -``` - -Simulate internal server failure: - -``` -$ app=$(minikube service my-app --url) -$ while sleep 1; do curl "$app"/internal-error; done -``` - - -## Cleanup - -Delete Prometheus and my-app Helm releases: - -``` -$ make cleanup -``` +- [rollback based on Prometheus](prometheus.md) +- [rollback based on ElasticSearch](elasticsearch.md) +- [rollback based on Sentry](sentry.md) diff --git a/examples/app/charts/templates/deployment.yaml b/examples/app/charts/templates/deployment.yaml index 0a9f496..0d0607d 100644 --- a/examples/app/charts/templates/deployment.yaml +++ b/examples/app/charts/templates/deployment.yaml @@ -26,6 +26,11 @@ spec: - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} ports: - name: http containerPort: 8080 diff --git a/examples/app/charts/values.yaml b/examples/app/charts/values.yaml index 8e6cc51..f52c17f 100644 --- a/examples/app/charts/values.yaml +++ b/examples/app/charts/values.yaml @@ -4,6 +4,12 @@ replicaCount: 1 +# Sentry DSN to send event to, ie: https://:@sentry.io/ +env: + # SENTRY_DSN: + # SENTRY_RELEASE: + # SENTRY_ENVIRONMENT: + image: repository: my-app tag: latest diff --git a/examples/app/go.mod b/examples/app/go.mod index 300b944..3dbe961 100644 --- a/examples/app/go.mod +++ b/examples/app/go.mod @@ -2,12 +2,15 @@ module github.com/ContainerSolutions/helm-monitor/examples/app require ( github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a + github.com/certifi/gocertifi v0.0.0-20180905225744-ee1a9a0726d2 // indirect + github.com/getsentry/raven-go v0.0.0-20180903072508-084a9de9eb03 // indirect github.com/golang/protobuf v1.0.0 github.com/gorilla/context v0.0.0-20160226214623-1ea25387ff6f github.com/gorilla/mux v1.6.2 github.com/heptiolabs/healthcheck v0.0.0-20180807145615-6ff867650f40 github.com/justinas/alice v0.0.0-20171023064455-03f45bd4b7da github.com/matttproud/golang_protobuf_extensions v1.0.0 + github.com/pkg/errors v0.8.0 // indirect github.com/prometheus/client_golang v0.9.0-pre1 github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5 github.com/prometheus/common v0.0.0-20180110214958-89604d197083 diff --git a/examples/app/go.sum b/examples/app/go.sum index ebf5051..fe773ad 100644 --- a/examples/app/go.sum +++ b/examples/app/go.sum @@ -1,5 +1,9 @@ github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a h1:BtpsbiV638WQZwhA98cEZw2BsbnQJrbd0BI7tsy0W1c= github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/certifi/gocertifi v0.0.0-20180905225744-ee1a9a0726d2 h1:MmeatFT1pTPSVb4nkPmBFN/LRZ97vPjsFKsZrU3KKTs= +github.com/certifi/gocertifi v0.0.0-20180905225744-ee1a9a0726d2/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4= +github.com/getsentry/raven-go v0.0.0-20180903072508-084a9de9eb03 h1:G/9fPivTr5EiyqE9OlW65iMRUxFXMGRHgZFGo50uG8Q= +github.com/getsentry/raven-go v0.0.0-20180903072508-084a9de9eb03/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= github.com/golang/protobuf v1.0.0 h1:lsek0oXi8iFE9L+EXARyHIjU5rlWIhhTkjDz3vHhWWQ= github.com/golang/protobuf v1.0.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/gorilla/context v0.0.0-20160226214623-1ea25387ff6f/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= @@ -15,6 +19,8 @@ github.com/justinas/alice v0.0.0-20171023064455-03f45bd4b7da h1:5y58+OCjoHCYB818 github.com/justinas/alice v0.0.0-20171023064455-03f45bd4b7da/go.mod h1:oLH0CmIaxCGXD67VKGR5AacGXZSMznlmeqM8RzPrcY8= github.com/matttproud/golang_protobuf_extensions v1.0.0 h1:YNOwxxSJzSUARoD9KRZLzM9Y858MNGCOACTvCW9TSAc= github.com/matttproud/golang_protobuf_extensions v1.0.0/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/prometheus/client_golang v0.8.0 h1:1921Yw9Gc3iSc4VQh3PIoOqgPCZS7G/4xQNVUp8Mda8= github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.0-pre1 h1:AWTOhsOI9qxeirTuA0A4By/1Es1+y9EcCGY6bBZ2fhM= diff --git a/examples/app/main.go b/examples/app/main.go index 98bb5b0..3585aa1 100644 --- a/examples/app/main.go +++ b/examples/app/main.go @@ -13,11 +13,18 @@ import ( "github.com/gorilla/mux" "github.com/heptiolabs/healthcheck" "github.com/justinas/alice" + + // monitoring "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + + // logging "github.com/rs/zerolog" "github.com/rs/zerolog/hlog" "github.com/rs/zerolog/log" + + // events + raven "github.com/getsentry/raven-go" ) var ( @@ -178,5 +185,6 @@ func httpErrorHandler(w http.ResponseWriter, r *http.Request) { } w.WriteHeader(http.StatusInternalServerError) - fmt.Fprintf(w, "Host: %s, Version: %s, Status: Internal Servicer Error\n", hostname, version) + fmt.Fprintf(w, "Host: %s, Version: %s, Status: Internal Service Error\n", hostname, version) + raven.CaptureError(fmt.Errorf("Error triggered, version: %v", version), nil) } diff --git a/examples/elasticsearch.md b/examples/elasticsearch.md new file mode 100644 index 0000000..a14f713 --- /dev/null +++ b/examples/elasticsearch.md @@ -0,0 +1,59 @@ +Rollback based on an ElasticSearch query +======================================== + +> Use helm-monitor to rollback a release based on an ElasticSearch query + +## Prepare + +Make sure to follow the steps described in [README.md](README.md) in order to +start Minikube with Tiller installed and pre-build the application. + +Minikube support the EFK stack via addons, to enable it: + +```bash +$ minikube addons enable efk +``` + +If Minikube was already running, you might need to restart it in order to have +the EFK stack up and running: + +```bash +$ minikube stop +$ minikube start +``` + +Access Kibana: + +```bash +$ minikube service kibana-logging -n kube-system +``` + +## Upgrade and monitor + +```bash +# port forward elasticsearch port locally +$ kubectl port-forward -n kube-system $(kubectl get po -n kube-system -l k8s-app=elasticsearch-logging -o jsonpath="{.items[0].metadata.name}") 9200 + +# release version 2 +$ helm upgrade -i my-app --set image.tag=2.0.0 ./app/charts + +# monitor +$ helm monitor elasticsearch my-app ./elasticsearch-query.json + +# or via Lucene query +$ helm monitor elasticsearch my-app "status:500 AND kubernetes.labels.app:app AND version:2.0.0" +``` + +Simulate internal server failure: + +```bash +$ curl $(minikube service my-app --url)internal-error +``` + +## Cleanup + +Delete my-app Helm releases: + +```bash +$ helm del --purge my-app +``` diff --git a/examples/prometheus.md b/examples/prometheus.md new file mode 100644 index 0000000..a9aab45 --- /dev/null +++ b/examples/prometheus.md @@ -0,0 +1,61 @@ +Rollback based on a Prometheus query +==================================== + +> Use helm-monitor to rollback a release based on a Prometheus query. In this +example we run a Prometheus instance and a GoLang application in Minikube, +upgrade then monitor for HTTP failure. If the amount of 5xx reach a certain +limit, then the application get automatically rolled back to its previous state. + +## Prepare + +Make sure to follow the steps described in [README.md](README.md) in order to +start Minikube with Tiller installed and pre-build the application. + +Install Prometheus: + +```bash +$ helm install \ + --version 7.0.2 \ + --set server.service.type=Loadbalancer \ + --set server.global.scrape_interval=30s \ + --set alertmanager.enabled=false \ + --set kubeStateMetrics.enabled=false \ + --set nodeExporter.enabled=false \ + --set pushgateway.enabled=false \ + --name prometheus \ + stable/prometheus +``` + +Access Prometheus: + +```bash +$ minikube service prometheus +``` + +### Upgrade and monitor + +```bash +# get Prometheus endpoint +$ prometheus=$(minikube service prometheus-server --url) + +# release version 2 +$ helm upgrade -i my-app --set image.tag=2.0.0 ./app/charts + +# monitor +$ helm monitor prometheus my-app --prometheus $prometheus 'rate(http_requests_total{code=~"^5.*$",version="2.0.0"}[5m]) > 0' +``` + +In a new terminal, simulate internal server failure: + +```bash +$ app=$(minikube service my-app --url) +$ while sleep 1; do curl "$app"/internal-error; done +``` + +## Cleanup + +Delete Prometheus and my-app Helm releases: + +```bash +$ helm del --purge prometheus my-app +``` diff --git a/examples/sentry.md b/examples/sentry.md new file mode 100644 index 0000000..756d320 --- /dev/null +++ b/examples/sentry.md @@ -0,0 +1,55 @@ +Rollback based on Sentry events +=============================== + +> Use helm-monitor to rollback a release based on Sentry events + +## Prepare + +Make sure to follow the steps described in [README.md](README.md) in order to +start Minikube with Tiller installed and pre-build the application. + +Install Sentry, increase the timeout to let Tiller execute the chart hooks: + +```bash +$ helm install --version 0.4.1 --name sentry stable/sentry --timeout 3200 +$ minikube service sentry-sentry +``` + +Open the Sentry UI and configure a new project called "my-project". + +Install the example application with the DSN: +```bash +$ helm upgrade -i my-app \ + --set image.tag=1.0.0 \ + --set env.SENTRY_DSN= \ + --set env.SENTRY_RELEASE=1.0.0 \ + ./app/charts +``` + +### Upgrade and monitor + +```bash +# get the Sentry endpoint +$ sentry=$(minikube service sentry-sentry --url) + +# release version 2 +$ helm upgrade -i my-app \ + --set image.tag=2.0.0 \ + --set env.SENTRY_DSN= \ + --set env.SENTRY_RELEASE=2.0.0 \ + ./app/charts + +# monitor +$ helm monitor sentry my-app \ + --api-key \ + --organization sentry \ + --project my-project \ + --sentry $sentry \ + --tag release=2.0.0 \ + 'Error triggered' +``` + +In a new terminal, simulate internal server failure: +```bash +$ curl $(minikube service my-app --url)/internal-error +``` diff --git a/go.mod b/go.mod index c3c69cf..b4b12d2 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/ContainerSolutions/helm-monitor require ( github.com/BurntSushi/toml v0.3.0 // indirect github.com/Masterminds/semver v1.4.2 // indirect + github.com/davecgh/go-spew v1.1.1 github.com/docker/distribution v2.6.0-rc.1.0.20170726174610-edc3ab29cdff+incompatible github.com/ghodss/yaml v1.0.0 // indirect github.com/gobwas/glob v0.2.3 // indirect diff --git a/go.sum b/go.sum index cee47a0..c6d9050 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,8 @@ github.com/BurntSushi/toml v0.3.0 h1:e1/Ivsx3Z0FVTV0NSOv/aVgbUWyQuzj7DDnFblkRvsY github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/Masterminds/semver v1.4.2 h1:WBLTQ37jOCzSLtXNdoo8bNM8876KhNqOKvrlGITgsTc= github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docker/distribution v2.6.0-rc.1.0.20170726174610-edc3ab29cdff+incompatible h1:357nGVUC8gSpeSc2Axup8HfrfTLLUfWfCsCUhiQSKIg= github.com/docker/distribution v2.6.0-rc.1.0.20170726174610-edc3ab29cdff+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= diff --git a/plugin.yaml b/plugin.yaml index f1bbed0..3255b42 100644 --- a/plugin.yaml +++ b/plugin.yaml @@ -1,10 +1,10 @@ --- name: "monitor" -version: "0.2.0" +version: "0.3.0" usage: "monitor and rollback in case of failure based on metrics or logs" description: |- - Query at a given interval a Prometheus or ElasticSearch instance, a rollback - of the release is initiated if the number of item from the result is positive. + Query at a given interval a Prometheus, ElasticSearch or Sentry instance. A + rollback of the release is initiated if the number of result is positive. ignoreFlags: false useTunnel: true command: "$HELM_PLUGIN_DIR/helm-monitor"