diff --git a/Makefile b/Makefile index 377f9b69bd9..b316ffcce9c 100644 --- a/Makefile +++ b/Makefile @@ -266,8 +266,7 @@ github.com/prometheus/prometheus/pkg/testutils=github.com/thanos-io/thanos/pkg/t github.com/prometheus/client_golang/prometheus.{DefaultGatherer,DefBuckets,NewUntypedFunc,UntypedFunc},\ github.com/prometheus/client_golang/prometheus.{NewCounter,NewCounterVec,NewCounterVec,NewGauge,NewGaugeVec,NewGaugeFunc,\ NewHistorgram,NewHistogramVec,NewSummary,NewSummaryVec}=github.com/prometheus/client_golang/prometheus/promauto.{NewCounter,\ -NewCounterVec,NewCounterVec,NewGauge,NewGaugeVec,NewGaugeFunc,NewHistorgram,NewHistogramVec,NewSummary,NewSummaryVec},\ -sync/atomic=go.uber.org/atomic" ./... +NewCounterVec,NewCounterVec,NewGauge,NewGaugeVec,NewGaugeFunc,NewHistorgram,NewHistogramVec,NewSummary,NewSummaryVec}" ./... @$(FAILLINT) -paths "fmt.{Print,Println,Sprint}" -ignore-tests ./... @echo ">> linting all of the Go files GOGC=${GOGC}" @$(GOLANGCI_LINT) run diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 4c9bc39303a..6ee488411dd 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -626,7 +626,7 @@ func runRule( } }() - s := shipper.New(logger, reg, dataDir, bkt, func() labels.Labels { return lset }, metadata.RulerSource, false, allowOutOfOrderUpload) + s := shipper.New(logger, reg, dataDir, bkt, func() labels.Labels { return lset }, metadata.RulerSource, allowOutOfOrderUpload) ctx, cancel := context.WithCancel(context.Background()) diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 35bc41c5ca3..ec11ca47447 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -273,8 +273,12 @@ func runSidecar( return errors.Wrapf(err, "aborting as no external labels found after waiting %s", promReadyTimeout) } - s := shipper.New(logger, reg, conf.tsdb.path, bkt, m.Labels, metadata.SidecarSource, - conf.shipper.uploadCompacted, conf.shipper.allowOutOfOrderUpload) + var s *shipper.Shipper + if conf.shipper.uploadCompacted { + s = shipper.NewWithCompacted(logger, reg, conf.tsdb.path, bkt, m.Labels, metadata.SidecarSource, conf.shipper.allowOutOfOrderUpload) + } else { + s = shipper.New(logger, reg, conf.tsdb.path, bkt, m.Labels, metadata.SidecarSource, conf.shipper.allowOutOfOrderUpload) + } return runutil.Repeat(30*time.Second, ctx.Done(), func() error { if uploaded, err := s.Sync(ctx); err != nil { diff --git a/docs/components/sidecar.md b/docs/components/sidecar.md index bf684759547..31616d5f692 100644 --- a/docs/components/sidecar.md +++ b/docs/components/sidecar.md @@ -32,7 +32,7 @@ Prometheus servers connected to the Thanos cluster via the sidecar are subject t If you choose to use the sidecar to also upload data to object storage: * Must specify object storage (`--objstore.*` flags) -* It only uploads uncompacted Prometheus blocks. For compacted blocks, see [Upload compacted blocks](./sidecar.md/#upload-compacted-blocks). +* It only uploads uncompacted Prometheus blocks. For compacted blocks, see [Upload compacted blocks](./sidecar.md/#upload-compacted-blocks-experimental). * The `--storage.tsdb.min-block-duration` and `--storage.tsdb.max-block-duration` must be set to equal values to disable local compaction on order to use Thanos sidecar upload, otherwise leave local compaction on if sidecar just exposes StoreAPI and your retention is normal. The default of `2h` is recommended. Mentioned parameters set to equal values disable the internal Prometheus compaction, which is needed to avoid the uploaded data corruption when Thanos compactor does its job, this is critical for data consistency and should not be ignored if you plan to use Thanos compactor. Even though you set mentioned parameters equal, you might observe Prometheus internal metric `prometheus_tsdb_compactions_total` being incremented, don't be confused by that: Prometheus writes initial head block to filesytem via its internal compaction mechanism, but if you have followed recommendations - data won't be modified by Prometheus before the sidecar uploads it. Thanos sidecar will also check sanity of the flags set to Prometheus on the startup and log errors or warning if they have been configured improperly (#838). * The retention is recommended to not be lower than three times the min block duration, so 6 hours. This achieves resilience in the face of connectivity issues to the object storage since all local data will remain available within the Thanos cluster. If connectivity gets restored the backlog of blocks gets uploaded to the object storage. @@ -70,7 +70,7 @@ config: bucket: example-bucket ``` -## Upload compacted blocks +## Upload compacted blocks (EXPERIMENTAL) If you want to migrate from a pure Prometheus setup to Thanos and have to keep the historical data, you can use the flag `--shipper.upload-compacted`. This will also upload blocks that were compacted by Prometheus. Values greater than 1 in the `compaction.level` field of a Prometheus blockβs `meta.json` file indicate level of compaction. diff --git a/go.mod b/go.mod index 3562c93cd0d..398b2adb878 100644 --- a/go.mod +++ b/go.mod @@ -54,7 +54,6 @@ require ( github.com/uber/jaeger-lib v2.2.0+incompatible go.elastic.co/apm v1.5.0 go.elastic.co/apm/module/apmot v1.5.0 - go.uber.org/atomic v1.6.0 go.uber.org/automaxprocs v1.2.0 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d diff --git a/pkg/alert/alert.go b/pkg/alert/alert.go index 59dd4c83f57..0a7b4b30e1f 100644 --- a/pkg/alert/alert.go +++ b/pkg/alert/alert.go @@ -14,6 +14,7 @@ import ( "net/url" "path" "sync" + "sync/atomic" "time" "github.com/go-kit/kit/log" @@ -24,7 +25,6 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/pkg/labels" - "go.uber.org/atomic" "github.com/thanos-io/thanos/pkg/runutil" "github.com/thanos-io/thanos/pkg/tracing" @@ -370,7 +370,7 @@ func (s *Sender) Send(ctx context.Context, alerts []*Alert) { var ( wg sync.WaitGroup - numSuccess atomic.Uint64 + numSuccess uint64 ) for _, am := range s.alertmanagers { for _, u := range am.dispatcher.Endpoints() { @@ -396,14 +396,14 @@ func (s *Sender) Send(ctx context.Context, alerts []*Alert) { s.latency.WithLabelValues(u.Host).Observe(time.Since(start).Seconds()) s.sent.WithLabelValues(u.Host).Add(float64(len(alerts))) - numSuccess.Inc() + atomic.AddUint64(&numSuccess, 1) }) }(am, *u) } } wg.Wait() - if numSuccess.Load() > 0 { + if numSuccess > 0 { return } diff --git a/pkg/prober/http.go b/pkg/prober/http.go index 74bc5f6dcbf..fe273741f1d 100644 --- a/pkg/prober/http.go +++ b/pkg/prober/http.go @@ -6,18 +6,18 @@ package prober import ( "io" "net/http" + "sync/atomic" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" - "go.uber.org/atomic" ) type check func() bool // HTTPProbe represents health and readiness status of given component, and provides HTTP integration. type HTTPProbe struct { - ready atomic.Uint32 - healthy atomic.Uint32 + ready uint32 + healthy uint32 } // NewHTTP returns HTTPProbe representing readiness and healthiness of given component. @@ -49,33 +49,34 @@ func (p *HTTPProbe) handler(logger log.Logger, c check) http.HandlerFunc { // isReady returns true if component is ready. func (p *HTTPProbe) isReady() bool { - ready := p.ready.Load() + ready := atomic.LoadUint32(&p.ready) return ready > 0 } // isHealthy returns true if component is healthy. func (p *HTTPProbe) isHealthy() bool { - healthy := p.healthy.Load() + healthy := atomic.LoadUint32(&p.healthy) return healthy > 0 } // Ready sets components status to ready. func (p *HTTPProbe) Ready() { - p.ready.Swap(1) + atomic.SwapUint32(&p.ready, 1) } // NotReady sets components status to not ready with given error as a cause. func (p *HTTPProbe) NotReady(err error) { - p.ready.Swap(0) + atomic.SwapUint32(&p.ready, 0) } // Healthy sets components status to healthy. func (p *HTTPProbe) Healthy() { - p.healthy.Swap(1) + atomic.SwapUint32(&p.healthy, 1) + } // NotHealthy sets components status to not healthy with given error as a cause. func (p *HTTPProbe) NotHealthy(err error) { - p.healthy.Swap(0) + atomic.SwapUint32(&p.healthy, 0) } diff --git a/pkg/receive/multitsdb.go b/pkg/receive/multitsdb.go index 017224d566b..889544f5411 100644 --- a/pkg/receive/multitsdb.go +++ b/pkg/receive/multitsdb.go @@ -282,7 +282,6 @@ func (t *MultiTSDB) startTSDB(logger log.Logger, tenantID string, tenant *tenant t.bucket, func() labels.Labels { return lbls }, metadata.ReceiveSource, - false, t.allowOutOfOrderUpload, ) } diff --git a/pkg/reloader/reloader.go b/pkg/reloader/reloader.go index 6f406ab9a24..b3b43f98853 100644 --- a/pkg/reloader/reloader.go +++ b/pkg/reloader/reloader.go @@ -288,7 +288,7 @@ func (r *Reloader) apply(ctx context.Context) error { return err } - // filepath.Walk uses Lstat to retrieve os.FileInfo. Lstat does not + // filepath.Walk uses Lstat to retriev os.FileInfo. Lstat does not // follow symlinks. Make sure to follow a symlink before checking // if it is a directory. targetFile, err := os.Stat(path) diff --git a/pkg/reloader/reloader_test.go b/pkg/reloader/reloader_test.go index af8b1f25931..30fe5f4c13d 100644 --- a/pkg/reloader/reloader_test.go +++ b/pkg/reloader/reloader_test.go @@ -15,12 +15,12 @@ import ( "path/filepath" "strings" "sync" + "sync/atomic" "testing" "time" "github.com/fortytw2/leaktest" "github.com/thanos-io/thanos/pkg/testutil" - "go.uber.org/atomic" ) func TestReloader_ConfigApply(t *testing.T) { diff --git a/pkg/shipper/shipper.go b/pkg/shipper/shipper.go index 30496e43c04..5c961f4459f 100644 --- a/pkg/shipper/shipper.go +++ b/pkg/shipper/shipper.go @@ -83,9 +83,8 @@ type Shipper struct { allowOutOfOrderUploads bool } -// New creates a new shipper that detects new TSDB blocks in dir and uploads them to -// remote if necessary. It attaches the Thanos metadata section in each meta JSON file. -// If uploadCompacted is enabled, it also uploads compacted blocks which are already in filesystem. +// New creates a new shipper that detects new TSDB blocks in dir and uploads them +// to remote if necessary. It attaches the Thanos metadata section in each meta JSON file. func New( logger log.Logger, r prometheus.Registerer, @@ -93,7 +92,6 @@ func New( bucket objstore.Bucket, lbls func() labels.Labels, source metadata.SourceType, - uploadCompacted bool, allowOutOfOrderUploads bool, ) *Shipper { if logger == nil { @@ -108,10 +106,40 @@ func New( dir: dir, bucket: bucket, labels: lbls, - metrics: newMetrics(r, uploadCompacted), + metrics: newMetrics(r, false), source: source, allowOutOfOrderUploads: allowOutOfOrderUploads, - uploadCompacted: uploadCompacted, + } +} + +// NewWithCompacted creates a new shipper that detects new TSDB blocks in dir and uploads them +// to remote if necessary, including compacted blocks which are already in filesystem. +// It attaches the Thanos metadata section in each meta JSON file. +func NewWithCompacted( + logger log.Logger, + r prometheus.Registerer, + dir string, + bucket objstore.Bucket, + lbls func() labels.Labels, + source metadata.SourceType, + allowOutOfOrderUploads bool, +) *Shipper { + if logger == nil { + logger = log.NewNopLogger() + } + if lbls == nil { + lbls = func() labels.Labels { return nil } + } + + return &Shipper{ + logger: logger, + dir: dir, + bucket: bucket, + labels: lbls, + metrics: newMetrics(r, true), + source: source, + uploadCompacted: true, + allowOutOfOrderUploads: allowOutOfOrderUploads, } } diff --git a/pkg/shipper/shipper_e2e_test.go b/pkg/shipper/shipper_e2e_test.go index e0383e4432d..9fac4ca00d9 100644 --- a/pkg/shipper/shipper_e2e_test.go +++ b/pkg/shipper/shipper_e2e_test.go @@ -45,7 +45,7 @@ func TestShipper_SyncBlocks_e2e(t *testing.T) { }() extLset := labels.FromStrings("prometheus", "prom-1") - shipper := New(log.NewLogfmtLogger(os.Stderr), nil, dir, metricsBucket, func() labels.Labels { return extLset }, metadata.TestSource, false, false) + shipper := New(log.NewLogfmtLogger(os.Stderr), nil, dir, metricsBucket, func() labels.Labels { return extLset }, metadata.TestSource, false) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -219,7 +219,7 @@ func TestShipper_SyncBlocksWithMigrating_e2e(t *testing.T) { defer upcancel2() testutil.Ok(t, p.WaitPrometheusUp(upctx2)) - shipper := New(log.NewLogfmtLogger(os.Stderr), nil, dir, bkt, func() labels.Labels { return extLset }, metadata.TestSource, true, false) + shipper := NewWithCompacted(log.NewLogfmtLogger(os.Stderr), nil, dir, bkt, func() labels.Labels { return extLset }, metadata.TestSource, false) // Create 10 new blocks. 9 of them (non compacted) should be actually uploaded. var ( diff --git a/pkg/shipper/shipper_test.go b/pkg/shipper/shipper_test.go index 59c564df3c4..32cdeedf3b7 100644 --- a/pkg/shipper/shipper_test.go +++ b/pkg/shipper/shipper_test.go @@ -26,7 +26,7 @@ func TestShipperTimestamps(t *testing.T) { testutil.Ok(t, os.RemoveAll(dir)) }() - s := New(nil, nil, dir, nil, nil, metadata.TestSource, false, false) + s := New(nil, nil, dir, nil, nil, metadata.TestSource, false) // Missing thanos meta file. _, _, err = s.Timestamps() @@ -123,7 +123,7 @@ func TestIterBlockMetas(t *testing.T) { }, })) - shipper := New(nil, nil, dir, nil, nil, metadata.TestSource, false, false) + shipper := New(nil, nil, dir, nil, nil, metadata.TestSource, false) metas, err := shipper.blockMetasFromOldest() testutil.Ok(t, err) testutil.Equals(t, sort.SliceIsSorted(metas, func(i, j int) bool { @@ -162,7 +162,7 @@ func BenchmarkIterBlockMetas(b *testing.B) { }) b.ResetTimer() - shipper := New(nil, nil, dir, nil, nil, metadata.TestSource, false, false) + shipper := New(nil, nil, dir, nil, nil, metadata.TestSource, false) _, err = shipper.blockMetasFromOldest() testutil.Ok(b, err) diff --git a/pkg/store/bucket_test.go b/pkg/store/bucket_test.go index ef6a3068540..951b83f732f 100644 --- a/pkg/store/bucket_test.go +++ b/pkg/store/bucket_test.go @@ -19,6 +19,7 @@ import ( "sort" "strconv" "sync" + "sync/atomic" "testing" "time" @@ -51,7 +52,6 @@ import ( storetestutil "github.com/thanos-io/thanos/pkg/store/storepb/testutil" "github.com/thanos-io/thanos/pkg/testutil" "github.com/thanos-io/thanos/pkg/testutil/e2eutil" - "go.uber.org/atomic" ) var emptyRelabelConfig = make([]*relabel.Config, 0) @@ -1281,10 +1281,10 @@ func benchBucketSeries(t testutil.TB, samplesPerSeries, totalSeries int, request if !t.IsBenchmark() { // Make sure the pool is correctly used. This is expected for 200k numbers. - testutil.Equals(t, numOfBlocks, int(chunkPool.(*mockedPool).gets.Load())) + testutil.Equals(t, numOfBlocks, int(chunkPool.(*mockedPool).gets)) // TODO(bwplotka): This is wrong negative for large number of samples (1mln). Investigate. - testutil.Equals(t, 0, int(chunkPool.(*mockedPool).balance.Load())) - chunkPool.(*mockedPool).gets.Store(0) + testutil.Equals(t, 0, int(chunkPool.(*mockedPool).balance)) + chunkPool.(*mockedPool).gets = 0 for _, b := range blocks { // NOTE(bwplotka): It is 4 x 1.0 for 100mln samples. Kind of make sense: long series. @@ -1306,8 +1306,8 @@ func (m fakePool) Put(_ *[]byte) {} type mockedPool struct { parent pool.BytesPool - balance atomic.Uint64 - gets atomic.Uint64 + balance uint64 + gets uint64 } func (m *mockedPool) Get(sz int) (*[]byte, error) { @@ -1315,13 +1315,13 @@ func (m *mockedPool) Get(sz int) (*[]byte, error) { if err != nil { return nil, err } - m.balance.Add(uint64(cap(*b))) - m.gets.Add(uint64(1)) + atomic.AddUint64(&m.balance, uint64(cap(*b))) + atomic.AddUint64(&m.gets, uint64(1)) return b, nil } func (m *mockedPool) Put(b *[]byte) { - m.balance.Sub(uint64(cap(*b))) + atomic.AddUint64(&m.balance, ^uint64(cap(*b)-1)) m.parent.Put(b) } diff --git a/pkg/store/limiter.go b/pkg/store/limiter.go index c60be901e92..1e354721c23 100644 --- a/pkg/store/limiter.go +++ b/pkg/store/limiter.go @@ -5,10 +5,10 @@ package store import ( "sync" + "sync/atomic" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "go.uber.org/atomic" ) type ChunksLimiter interface { @@ -25,7 +25,7 @@ type ChunksLimiterFactory func(failedCounter prometheus.Counter) ChunksLimiter // Limiter is a simple mechanism for checking if something has passed a certain threshold. type Limiter struct { limit uint64 - reserved atomic.Uint64 + reserved uint64 // Counter metric which we will increase if limit is exceeded. failedCounter prometheus.Counter @@ -42,7 +42,7 @@ func (l *Limiter) Reserve(num uint64) error { if l.limit == 0 { return nil } - if reserved := l.reserved.Add(num); reserved > l.limit { + if reserved := atomic.AddUint64(&l.reserved, num); reserved > l.limit { // We need to protect from the counter being incremented twice due to concurrency // while calling Reserve(). l.failedOnce.Do(l.failedCounter.Inc) diff --git a/tutorials/katacoda/thanos-pathway.json b/tutorials/katacoda/thanos-pathway.json index 8ad2c1b6417..5e83394ce36 100644 --- a/tutorials/katacoda/thanos-pathway.json +++ b/tutorials/katacoda/thanos-pathway.json @@ -11,7 +11,7 @@ { "course_id": "2-lts", "title": "Intro: Downsampling and unlimited metric retention for Prometheus", - "description": "In progress. Stay Tuned!" + "description": "Learn how to extend your metric retention in a cheap and easy way with Thanos." }, { "course_id": "3-meta-monitoring", diff --git a/tutorials/katacoda/thanos/2-lts/courseBase.sh b/tutorials/katacoda/thanos/2-lts/courseBase.sh index 38c3b2e9675..fb40f8c248a 100644 --- a/tutorials/katacoda/thanos/2-lts/courseBase.sh +++ b/tutorials/katacoda/thanos/2-lts/courseBase.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash -docker pull quay.io/prometheus/prometheus:v2.16.0 -docker pull quay.io/thanos/thanos:v0.13.0 \ No newline at end of file +docker pull dockerenginesonia/thanosbench:v7 +docker pull quay.io/prometheus/prometheus:v2.19.0 +docker pull quay.io/thanos/thanos:v0.15.0 +docker pull minio/minio:RELEASE.2019-01-31T00-31-19Z \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/finish.md b/tutorials/katacoda/thanos/2-lts/finish.md new file mode 100644 index 00000000000..eee4b96ffc7 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/finish.md @@ -0,0 +1,17 @@ +# Summary + +Congratulations! πππ +You completed our second Thanos tutorial. Let's summarize what we learned: + +* To preserve the data beyond Prometheus regular retention time, we used an object storage system for backing up our historical data. +* The Thanos Store component acts as a data retrieval proxy for data inside our object storage. +* With Sidecar uploading metric blocks to the object store as soon as it is written to disk, it keeps the βscraperβ (Prometheus with Thanos Sidecar), lightweight. This simplifies maintenance, cost, and system design. +* Thanos Compactor improved query efficiency and also reduced the required storage size. + +See next courses for other tutorials about different deployment models and more advanced features of Thanos! + +### Feedback + +Do you see any bug, typo in the tutorial or you have some feedback for us? + +let us know on https://github.com/thanos-io/thanos or #thanos slack channel linked on https://thanos.io \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/index.json b/tutorials/katacoda/thanos/2-lts/index.json index f177d823405..be58ab264f8 100644 --- a/tutorials/katacoda/thanos/2-lts/index.json +++ b/tutorials/katacoda/thanos/2-lts/index.json @@ -1,12 +1,12 @@ { "title": "Intro: Downsampling and unlimited metric retention for Prometheus", - "description": "Learn how to extend you metric retention in a cheap way with Thanos.", + "description": "Learn how to extend your metric retention in a cheap and easy way with Thanos.", "difficulty": "Beginner", "time": "15-20 Minutes", "details": { "steps": [ { - "title": "Start initial Prometheus servers", + "title": "Configuring Initial Prometheus Server", "text": "step1.md", "verify": "step1-verify.sh" }, @@ -18,17 +18,25 @@ { "title": "Thanos Store Gateway", "text": "step3.md", - "verify": "step3-verify.sh" + "answer": "step3-answer.md" + }, + { + "title": "Thanos Compactor", + "text": "step4.md" } ], "intro": { "text": "intro.md", - "courseData": "courseBase.sh" + "courseData": "courseBase.sh", + "credits": "https://thanos.io" + }, + "files": { + "text": "finish.md" } }, "files": [ "prometheus0_eu1.yml", - "prometheus0_us1.yml" + "bucket_storage.yml" ], "environment": { "uilayout": "editor-terminal", @@ -36,8 +44,9 @@ "showdashboard": true, "dashboards": [ {"name": "Prometheus 0 EU1", "port": 9090}, - {"name": "Prometheus 0 US1", "port": 9091} - ] + {"name": "Minio", "port": 9000}, + {"name": "Thanos Query", "port": 29090} + ] }, "backend": { "imageid": "docker-direct" diff --git a/tutorials/katacoda/thanos/2-lts/intro.md b/tutorials/katacoda/thanos/2-lts/intro.md index ccdda9329fa..d5617abe3b6 100644 --- a/tutorials/katacoda/thanos/2-lts/intro.md +++ b/tutorials/katacoda/thanos/2-lts/intro.md @@ -1,14 +1,14 @@ [Thanos](thanos.io) is a set of components that can be composed into a highly available metric system with unlimited storage capacity. It can be added seamlessly on top of existing Prometheus deployments. -Thanos is known as long-term storage for Prometheus. - -Thanos works in cloud native environments like Kubernetes as well as traditional ones. This course uses docker containers with pre-built docker images. +This course uses docker containers with pre-built docker images. In this tutorial, you will learn about : -* Thanos Store Gateway : a metric browser that serves metric blocks stored in S3 via *StoreAPI* gRPC API. -* Querying multiple Prometheus instances from single Prometheus API endpoint. +* How to start uploading your Prometheus data seamlessly to cheap object storage thanks to Thanos sidecar. +* How to further query data in object storage thanks to Thanos Store Gateway: a metric browser that serves metric blocks stored in Object Store via *StoreAPI* gRPC API. +* How to query both fresh and older data in easy way through Thanos Querier. + -Let's jump in! π€ +All of this allows you to keep your metrics in cheap and reliable object storage, allowing virtually unlimited metric retention for Prometheus. -https://thanos.io +Let's jump in! diff --git a/tutorials/katacoda/thanos/2-lts/query.png b/tutorials/katacoda/thanos/2-lts/query.png new file mode 100644 index 00000000000..f847df45d6b Binary files /dev/null and b/tutorials/katacoda/thanos/2-lts/query.png differ diff --git a/tutorials/katacoda/thanos/2-lts/step1-verify.sh b/tutorials/katacoda/thanos/2-lts/step1-verify.sh index d8247a5787b..91cffc97c5d 100644 --- a/tutorials/katacoda/thanos/2-lts/step1-verify.sh +++ b/tutorials/katacoda/thanos/2-lts/step1-verify.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 -curl -s 127.0.0.1:9091/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:29090/metrics > /dev/null || exit 1 echo "\"done\"" \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step1.md b/tutorials/katacoda/thanos/2-lts/step1.md index 435b9e1d7d9..63581995cfe 100644 --- a/tutorials/katacoda/thanos/2-lts/step1.md +++ b/tutorials/katacoda/thanos/2-lts/step1.md @@ -1,20 +1,35 @@ # Step 1 - Initial Prometheus Setup -Thanos builds upon vanilla Prometheus instances which states it seamlessly integrates into existing Prometheus setups. +Thanos is a set of components that adds high availability to Prometheus installations, unlimited metrics retention and global querying across clusters. -In this tutorial, we will be using two Prometheus Servers. +Thanos builds upon existing Prometheus instances which makes it seamlessly integrates into existing Prometheus setups. -* We have one Prometheus server in eu1 region and one Prometheus servers scraping the same target in us1 region. +In this tutorial, we will mimic the usual state with a Prometheus server running for several months. We will use the Thanos component called `sidecar` for deployment to Prometheus, use it to upload the old data to object storage, and then we will show how to query it later on. + +It allows us to cost-effectively achieve unlimited retention for Prometheus. Let's start this initial Prometheus setup, ready? -## Prometheus Configuration Files +## Generate Artifical Metric Data + +Before starting Prometheus, let's generate some artificial data. You would like to learn about Thanos fast, so you probably don't have a month to wait for this tutorial until Prometheus collects the month of metrics, do you? (: + +We will use our handy [thanosbench](link here) project to do so. + +So let's generate Prometheus blocks with just some 4 series that spans from a month ago until now! + +Execute the following command: + +``` +mkdir -p test && docker run -i dockerenginesonia/thanosbench:v7 block plan -p realistic-key-k8s-1d-small --labels 'cluster="one"' --max-time 2019-10-18T00:00:00Z | docker run -v /root/test:/test -i dockerenginesonia/thanosbench:v7 block gen --output.dir test +```{{execute}} -Here, we will prepare configuration files for all Prometheus instances. +## Prometheus Configuration Files -Click `Copy To Editor` for each config to propagate the configs to each file. +Here, we will prepare configuration files for the Prometheus instance that will run with our pre-generated data. +It will also scrape our components we will use in this tutorial. -First, for the EU Prometheus server that scrapes itself: +Click `Copy To Editor` for config to propagate the configs to file.
global: @@ -28,79 +43,113 @@ scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['127.0.0.1:9090'] -- -Second, for the US Prometheus server that scrapes the same target: - -
-global: - scrape_interval: 15s - evaluation_interval: 15s - external_labels: - cluster: us1 - replica: 0 - -scrape_configs: - - job_name: 'prometheus' + - job_name: 'sidecar' static_configs: - - targets: ['127.0.0.1:9091'] + - targets: ['127.0.0.1:19090'] + - job_name: 'minio' + metrics_path: /minio/prometheus/metrics + static_configs: + - targets: ['127.0.0.1:9000'] + - job_name: 'store_gateway' + static_configs: + - targets: ['127.0.0.1:19095']## Starting Prometheus Instances -Let's now start two containers representing our two different Prometheus instances. +Let's now start the container representing Prometheus instance. + +Note `-v $(pwd)/test:/prometheus \` and `--storage.tsdb.path=/prometheus` that allows us to place our generated data in Prometheus data directory. -Execute following commands: +Execute the following commands: ### Prepare "persistent volumes" -``` -mkdir -p prometheus0_eu1_data prometheus0_us1_data -```{{execute}} ### Deploying "EU1" ``` docker run -d --net=host --rm \ -v $(pwd)/prometheus0_eu1.yml:/etc/prometheus/prometheus.yml \ - -v $(pwd)/prometheus0_eu1_data:/prometheus \ + -v $(pwd)/test:/prometheus \ -u root \ --name prometheus-0-eu1 \ - quay.io/prometheus/prometheus:v2.14.0 \ + quay.io/prometheus/prometheus:v2.19.0 \ --config.file=/etc/prometheus/prometheus.yml \ --storage.tsdb.path=/prometheus \ + --storage.tsdb.max-block-duration=2h \ + --storage.tsdb.min-block-duration=2h \ --web.listen-address=:9090 \ --web.external-url=https://[[HOST_SUBDOMAIN]]-9090-[[KATACODA_HOST]].environments.katacoda.com \ --web.enable-lifecycle \ --web.enable-admin-api && echo "Prometheus EU1 started!" ```{{execute}} -and +## Setup Verification -### Deploying "US1" +Once started you should be able to reach the Prometheus instance here: + +* [Prometheus-0 EU1](https://[[HOST_SUBDOMAIN]]-9090-[[KATACODA_HOST]].environments.katacoda.com/) + +# Installing Thanos sidecar + +At the end of this step, we will have running Prometheus instance with sidecar deployed. You can read more about sidecar [here](https://thanos.io/tip/components/sidecar.md/). + + +## Deployment + +Click snippets to add a sidecar to the Prometheus instance. + +### Adding sidecar to "EU1" Prometheus ``` docker run -d --net=host --rm \ - -v $(pwd)/prometheus0_us1.yml:/etc/prometheus/prometheus.yml \ - -v $(pwd)/prometheus0_us1_data:/prometheus \ + -v $(pwd)/prometheus0_eu1.yml:/etc/prometheus/prometheus.yml \ + -v $(pwd)/test:/prometheus \ + --name prometheus-0-sidecar-eu1 \ -u root \ - --name prometheus-0-us1 \ - quay.io/prometheus/prometheus:v2.14.0 \ - --config.file=/etc/prometheus/prometheus.yml \ - --storage.tsdb.path=/prometheus \ - --web.listen-address=:9091 \ - --web.external-url=https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com \ - --web.enable-lifecycle \ - --web.enable-admin-api && echo "Prometheus 0 US1 started!" + quay.io/thanos/thanos:v0.15.0 \ + sidecar \ + --http-address 0.0.0.0:19090 \ + --grpc-address 0.0.0.0:19190 \ + --reloader.config-file /etc/prometheus/prometheus.yml \ + --prometheus.url http://127.0.0.1:9090 && echo "Started sidecar for Prometheus 0 EU1" ```{{execute}} -## Setup Verification +Now, you should have a sidecar running well. Since now Prometheus has access to sidecar metrics we can query for [`thanos_sidecar_prometheus_up`](https://[[HOST_SUBDOMAIN]]-9090-[[KATACODA_HOST]].environments.katacoda.com/graph?g0.expr=thanos_sidecar_prometheus_up&g0.tab=1) to check if sidecar has access to Prometheus. -Once started you should be able to reach all of those Prometheus instances: +## Problem statement: -* [Prometheus-0 EU1](https://[[HOST_SUBDOMAIN]]-9090-[[KATACODA_HOST]].environments.katacoda.com/) -* [Prometheus-1 US1](https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com/) +Let's try to play with this setup a bit. + +Grab a coffee (or your favorite tasty beverage). Let's verify whether the blocks were uploaded before or not? Interesting? π + +Tip: Look for `prometheus_tsdb_reloads_total` metric π΅οΈβ + +* Check here `prometheus_tsdb_reloads_total` + +## Deploying Thanos Querier + +Let' now start the Query component. As you remember [Thanos sidecar](https://thanos.io/tip/components/query.md/) exposes `StoreAPI` +so we will make sure we point the Querier to the gRPC endpoints of the sidecar: + +Click below snippet to start the Querier. + +``` +docker run -d --net=host --rm \ + --name querier \ + quay.io/thanos/thanos:v0.15.0 \ + query \ + --http-address 0.0.0.0:29090 \ + --query.replica-label replica \ + --store 127.0.0.1:19190 \ + --store 127.0.0.1:10906 && echo "Started Thanos Querier" +```{{execute}} + +## Setup verification + +Thanos Querier exposes very similar UI to the Prometheus, but on top of many `StoreAPIs, you wish to connect to. -## Next +To check if the Querier works as intended let's look on [Querier UI `Store` page](https://[[HOST_SUBDOMAIN]]-29090-[[KATACODA_HOST]].environments.katacoda.com/stores). -Voila ! We have 2 Prometheus instances running. In the next steps, we will learn how to transform Prometheus servers to solve various problems that will result into long term retention. \ No newline at end of file +This should list the sidecar, including the external label. diff --git a/tutorials/katacoda/thanos/2-lts/step2-verify.sh b/tutorials/katacoda/thanos/2-lts/step2-verify.sh index 26beeb23e22..cb969231c82 100644 --- a/tutorials/katacoda/thanos/2-lts/step2-verify.sh +++ b/tutorials/katacoda/thanos/2-lts/step2-verify.sh @@ -1,9 +1,13 @@ #!/usr/bin/env bash curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 -curl -s 127.0.0.1:9091/metrics > /dev/null || exit 1 curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 -curl -s 127.0.0.1:19091/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:29090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 echo "\"done\"" \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step2.md b/tutorials/katacoda/thanos/2-lts/step2.md index 9a52f28dcfa..75aff0ce7e9 100644 --- a/tutorials/katacoda/thanos/2-lts/step2.md +++ b/tutorials/katacoda/thanos/2-lts/step2.md @@ -1,81 +1,68 @@ -# Step 2 - Installing Thanos sidecar +# Step 2 - Object Storage Configuration -At the end of this step, we will have two running Prometheus instances with sidecar each. You can read more about sidecar [here](https://thanos.io/components/sidecar.md/). +In this step, we will configure the object store and change sidecar to upload to the object-store. -## Installation +## Running Minio -Here, we will modify our configuration files to include the sidecars. +Now, execute the command -Click `Copy To Editor` for each config to propagate the configs to each file. +``` +mkdir -p /storage/thanos && docker run -d --name minio -v /storage:/data -p 9000:9000 -e "MINIO_ACCESS_KEY=minio" -e "MINIO_SECRET_KEY=minio123" minio/minio:RELEASE.2019-01-31T00-31-19Z server /data +```{{execute}} -
-global: - scrape_interval: 15s - evaluation_interval: 15s - external_labels: - cluster: eu1 - replica: 0 +## Verification -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ['127.0.0.1:9090'] - - job_name: 'sidecar' - static_configs: - - targets: ['127.0.0.1:19090'] -+Now, you should have minio running well. -and - -
-global: - scrape_interval: 15s - evaluation_interval: 15s - external_labels: - cluster: us1 - replica: 0 - -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ['127.0.0.1:9091'] - - job_name: 'sidecar' - static_configs: - - targets: ['127.0.0.1:19091'] -+To check if the Minio is working as intended, let's check out [here](https://[[HOST_SUBDOMAIN]]-9000-[[KATACODA_HOST]].environments.katacoda.com/minio/) + +Enter the credentials as mentioned below: + +**Access Key** = `minio` +**Secret Key** = `minio123` + +## Configuration : -## Deployment +The configuration file content : -Click snippets to add sidecars to each Prometheus instance. +Click `Copy To Editor` for config to propagate the configs to the file `bucket_storage.yml`: -### Adding sidecar to "EU1" Prometheus +
+type: S3 +config: + bucket: "thanos" + endpoint: "127.0.0.1:9000" + insecure: true + signature_version2: true + access_key: "minio" + secret_key: "minio123" ++ +Before moving forward, we need to stop the `sidecar container` and we can do so by executing the following command: ``` -docker run -d --net=host --rm \ - -v $(pwd)/prometheus0_eu1.yml:/etc/prometheus/prometheus.yml \ - --name prometheus-0-sidecar-eu1 \ - -u root \ - quay.io/thanos/thanos:v0.12.2 \ - sidecar \ - --http-address 0.0.0.0:19090 \ - --grpc-address 0.0.0.0:19190 \ - --reloader.config-file /etc/prometheus/prometheus.yml \ - --prometheus.url http://127.0.0.1:9090 && echo "Started sidecar for Prometheus 0 EU1" +docker stop prometheus-0-sidecar-eu1 ```{{execute}} -### Adding sidecar to "US1" Prometheus +Now, execute the following command : ``` docker run -d --net=host --rm \ - -v $(pwd)/prometheus0_us1.yml:/etc/prometheus/prometheus.yml \ - --name prometheus-0-sidecar-us1 \ + -v $(pwd)/bucket_storage.yml:/etc/prometheus/bucket_storage.yml \ + -v $(pwd)/test:/prometheus \ + --name sidecar \ -u root \ - quay.io/thanos/thanos:v0.12.2 \ + quay.io/thanos/thanos:v0.15.0 \ sidecar \ - --http-address 0.0.0.0:19091 \ - --grpc-address 0.0.0.0:19191 \ - --reloader.config-file /etc/prometheus/prometheus.yml \ - --prometheus.url http://127.0.0.1:9091 && echo "Started sidecar for Prometheus 0 US1" + --tsdb.path /prometheus \ + --objstore.config-file /etc/prometheus/bucket_storage.yml \ + --prometheus.url http://127.0.0.1:9090 \ + --http-address 0.0.0.0:19090 \ + --grpc-address 0.0.0.0:19190 && echo "Store API exposed" ```{{execute}} -Now, you should have sidecars running well. Since now Prometheus has access to sidecar metrics we can query for [`thanos_sidecar_prometheus_up`](https://[[HOST_SUBDOMAIN]]-9090-[[KATACODA_HOST]].environments.katacoda.com/graph?g0.expr=thanos_sidecar_prometheus_up&g0.tab=1) to check if sidecar has access to Prometheus. \ No newline at end of file +The flag `--objstore.config-file` loads all the required configuration from the file to ship the TSDB blocks to an object storage bucket, the storage endpoints, and the credentials used. + +## Verification + +We can check whether the data is uploaded into `thanos` bucket by visitng [Minio](https://[[HOST_SUBDOMAIN]]-9000-[[KATACODA_HOST]].environments.katacoda.com/minio/). The stored metrics will also be available in the object storage. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step3-answer.md b/tutorials/katacoda/thanos/2-lts/step3-answer.md new file mode 100644 index 00000000000..06e48a1d847 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step3-answer.md @@ -0,0 +1,19 @@ +## Answer + +**In an HA Prometheus setup with Thanos sidecars, would there be issues with multiple sidecars attempting to upload the same data blocks to object storage?** + +This is handled by having unique **external labels** for all Prometheus, sidecar instances and HA replicas. To indicate that all replicas are storing same targets, they differ only in one label. + +For an instance, consider the situation below: + +``` +First: +"cluster": "prod1" +"replica": "0" + +Second: +"cluster":"prod1" +"replica": "1" +``` + +There is no problem with storing them since the label sets are **unique**. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step3-verify.sh b/tutorials/katacoda/thanos/2-lts/step3-verify.sh index f3a07bf6759..979b3df4206 100644 --- a/tutorials/katacoda/thanos/2-lts/step3-verify.sh +++ b/tutorials/katacoda/thanos/2-lts/step3-verify.sh @@ -1 +1,15 @@ -# add verification as per step-3 \ No newline at end of file +#!/usr/bin/env bash + +curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:29090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +echo "\"done\"" \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step3.md b/tutorials/katacoda/thanos/2-lts/step3.md index 2a76ec8435b..05e51e6b392 100644 --- a/tutorials/katacoda/thanos/2-lts/step3.md +++ b/tutorials/katacoda/thanos/2-lts/step3.md @@ -1,6 +1,6 @@ -# Step 3 - Installing Thanos Store +# Step 3 - Installing the Thanos Store -In this step, we will learn about Thanos Store Gateway, how to start and what problems are solved by it. +In this step, we will learn about Thanos Store Gateway, how to start, and what problems are solved by it. ## Thanos Components @@ -8,9 +8,9 @@ Thanos is a single Go binary capable to run in different modes. Each mode repres Let's take a look at all the Thanos commands: -```docker run --rm quay.io/thanos/thanos:v0.12.2 --help``` +```docker run --rm quay.io/thanos/thanos:v0.15.0 --help```{{execute}} -You should see multiple commands that solves different purposes, a block storage based long-term storage for Prometheus. +You should see multiple commands that solve different purposes, block storage based long-term storage for Prometheus. In this step we will focus on thanos `store gateway`: @@ -27,94 +27,47 @@ In this step we will focus on thanos `store gateway`: This data is generally safe to delete across restarts at the cost of increased startup times. -You can read more about [Store](https://thanos.io/components/store.md/) here. - -## Installation - -Here, we will modify our configuration files to include the store gateway and querier. - -Click `Copy To Editor` for each config to propagate the configs to each file. - -
-global: - scrape_interval: 15s - evaluation_interval: 15s - external_labels: - cluster: eu1 - replica: 0 - -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ['127.0.0.1:9090'] - - job_name: 'sidecar' - static_configs: - - targets: ['127.0.0.1:19090'] - - job_name: 'store_gateway' - static_configs: - - targets: ['127.0.0.1:19090'] - - job_name: 'querier' - static_configs: - - targets: ['127.0.0.1:19090'] -- -and - -
-global: - scrape_interval: 15s - evaluation_interval: 15s - external_labels: - cluster: us1 - replica: 0 - -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ['127.0.0.1:9091'] - - job_name: 'sidecar' - static_configs: - - targets: ['127.0.0.1:19091'] - - job_name: 'store_gateway' - static_configs: - - targets: ['127.0.0.1:19091'] - - job_name: 'querier' - static_configs: - - targets: ['127.0.0.1:19091'] -+You can read more about [Store](https://thanos.io/tip/components/store.md/) here. ## Deployment -Click snippets to deploy thanos store to each Prometheus instance. +Click on the snippet to deploy thanos store to the running Prometheus instance. ### Deploying store to "EU1" Prometheus ``` docker run -d --net=host --rm \ - -v $(pwd)/prometheus0_eu1.yml:/etc/prometheus/bucket.yml \ - --name prometheus-0-store-eu1 \ - -u root \ + -v $(pwd)/bucket_storage.yml:/etc/prometheus/bucket_storage.yml \ + -v $(pwd)/test:/prometheus \ + --name thanos-store \ + quay.io/thanos/thanos:v0.15.0 \ store \ - --http-address 0.0.0.0:19090 \ - --grpc-address 0.0.0.0:19190 \ - --objstore.config-file=/etc/prometheus/bucket.yml \ - --prometheus.url http://127.0.0.1:9090 && echo "Deployed store gateway for Prometheus 0 EU1" + --data-dir /prometheus \ + --objstore.config-file /etc/prometheus/bucket_storage.yml \ + --http-address 0.0.0.0:10905 \ + --grpc-address 0.0.0.0:10906 && echo "Thanos Store added" ```{{execute}} -### Deploying store to "US1" Prometheus +## How to query Thanos store data? -``` -docker run -d --net=host --rm \ - -v $(pwd)/prometheus0_us1.yml:/etc/prometheus/bucket.yml \ - --name prometheus-0-store-us1 \ - -u root \ - store \ - --http-address 0.0.0.0:19091 \ - --grpc-address 0.0.0.0:19191 \ - --objstore.config-file=/etc/prometheus/bucket.yml \ - --prometheus.url http://127.0.0.1:9091 && echo "Deployed store gateway for Prometheus 0 US1" -```{{execute}} +In this step, we will see how we can query Thanos store data which has access to historical data from the `thanos` bucket, and let's play with this setup a bit. + +Click on the [Querier UI `Graph` page](https://[[HOST_SUBDOMAIN]]-29090-[[KATACODA_HOST]].environments.katacoda.com/graph) and try querying data for a year or two by inserting metrics [k8s_app_metric0](https://[[HOST_SUBDOMAIN]]-29090-[[KATACODA_HOST]].environments.katacoda.com/graph?g0.expr=k8s_app_metric0&g0.tab=1). Make sure `deduplication` is selected and you will be able to discover all the data fetched by Thanos store. + +![](https://github.com/soniasingla/thanos/raw/master/tutorials/katacoda/thanos/2-lts/query.png) + +Also, you can check all the active endpoints located by thanos-store by clicking on [Stores](https://[[HOST_SUBDOMAIN]]-29090-[[KATACODA_HOST]].environments.katacoda.com/stores). + +We've added Thanos Query, a web and API frontend that can query a Prometheus instance and Thanos Store at the same time, which gives transparent access to the archived blocks and real-time metrics. The vanilla PromQL Prometheus engine used for evaluating the query deduces what time series and for what time ranges we need to fetch the data. Also, StoreAPIs propagate external labels and the time range they have data for, so we can do basic filtering on this. However, if you don't specify any of these in the query (only "up" series) the querier concurrently asks all the StoreAPI servers. It might cause a duplication of results between sidecar and store data. + +## Question Time? π€ + +In an HA Prometheus setup with Thanos sidecars, would there be issues with multiple sidecars attempting to upload the same data blocks to object storage? + +Think over this π + +To see the answer to this question click SHOW SOLUTION below. ## Next -Voila ! In the next step, we will talk about downsampling, why it's important and see how queries are still working served by Thanos Store Gateway. \ No newline at end of file +Voila! In the next step, we will talk about Thanos Compactor, it's retention capabilities, and how it improves query efficiency and reduce the required storage size. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step4-verify.sh b/tutorials/katacoda/thanos/2-lts/step4-verify.sh new file mode 100644 index 00000000000..f5ee7ba4fa6 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step4-verify.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:29090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +echo "\"done\"" \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step4.md b/tutorials/katacoda/thanos/2-lts/step4.md index ac9246ca529..8c437b86c11 100644 --- a/tutorials/katacoda/thanos/2-lts/step4.md +++ b/tutorials/katacoda/thanos/2-lts/step4.md @@ -1,5 +1,41 @@ -### TODO: Show that queries are working, served by Thanos Store Gateway. +# Step 4 - Thanos Compactor -Downsampling is the most important and required feature - itβs the ability to keep long term metrics with fewer number of samples. +In this step, we will install Thanos Compactor which applies the compaction procedure of the Prometheus 2.0 storage engine to block data in object storage. -If we don't perform that querying metrics with a high number of time series might cause issues because of the amount of data fetched by store. \ No newline at end of file +Before, moving forward, let's take a closer look at what the `Compactor` component does: + +## Compactor + +The `Compactor` is an essential component that operates on a single object storage bucket to compact, down-sample, apply retention, to the TSDB blocks held inside, thus, making queries on historical data more efficient. It creates aggregates of old metrics (based upon the rules). + +It is also responsible for downsampling of data, performing 5m downsampling after 40 hours, and 1h downsampling after 10 days. + +If you want to know more about Thanos Compactor, jump [here](https://thanos.io/tip/components/compact.md/). + +**Note**: Thanos Compactor is mandatory if you use object storage otherwise Thanos Store Gateway will be too slow without using a compactor. + +## Deploying Thanos Compactor + +Click below snippet to start the Compactor. + +``` +docker run -d --net=host --rm \ + -v $(pwd)/bucket_storage.yml:/etc/prometheus/bucket_storage.yml \ + --name thanos compact \ + quay.io/thanos/thanos:v0.15.0 \ + compact \ + --data-dir /prometheus \ + --objstore.config-file /etc/prometheus/ + bucket_storage.yml \ + --http-address 0.0.0.0:19092 +```{{execute}} + +## Unlimited Retention - Not Challenging anymore? + +As we know that Prometheus has very short retention which makes it almost stateless. This requires local compaction to be disabled to avoid any potential races with global compaction done by the Thanos compactor otherwise it will result in consuming lots of memory during the initial sync. + +## Next + +Awesome work! Feel free to play with the setup π€ + +Once Done, hit `Continue` for summary. \ No newline at end of file