diff --git a/Makefile b/Makefile index 377f9b69bd9..b316ffcce9c 100644 --- a/Makefile +++ b/Makefile @@ -266,8 +266,7 @@ github.com/prometheus/prometheus/pkg/testutils=github.com/thanos-io/thanos/pkg/t github.com/prometheus/client_golang/prometheus.{DefaultGatherer,DefBuckets,NewUntypedFunc,UntypedFunc},\ github.com/prometheus/client_golang/prometheus.{NewCounter,NewCounterVec,NewCounterVec,NewGauge,NewGaugeVec,NewGaugeFunc,\ NewHistorgram,NewHistogramVec,NewSummary,NewSummaryVec}=github.com/prometheus/client_golang/prometheus/promauto.{NewCounter,\ -NewCounterVec,NewCounterVec,NewGauge,NewGaugeVec,NewGaugeFunc,NewHistorgram,NewHistogramVec,NewSummary,NewSummaryVec},\ -sync/atomic=go.uber.org/atomic" ./... +NewCounterVec,NewCounterVec,NewGauge,NewGaugeVec,NewGaugeFunc,NewHistorgram,NewHistogramVec,NewSummary,NewSummaryVec}" ./... @$(FAILLINT) -paths "fmt.{Print,Println,Sprint}" -ignore-tests ./... @echo ">> linting all of the Go files GOGC=${GOGC}" @$(GOLANGCI_LINT) run diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 4c9bc39303a..6ee488411dd 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -626,7 +626,7 @@ func runRule( } }() - s := shipper.New(logger, reg, dataDir, bkt, func() labels.Labels { return lset }, metadata.RulerSource, false, allowOutOfOrderUpload) + s := shipper.New(logger, reg, dataDir, bkt, func() labels.Labels { return lset }, metadata.RulerSource, allowOutOfOrderUpload) ctx, cancel := context.WithCancel(context.Background()) diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 35bc41c5ca3..ec11ca47447 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -273,8 +273,12 @@ func runSidecar( return errors.Wrapf(err, "aborting as no external labels found after waiting %s", promReadyTimeout) } - s := shipper.New(logger, reg, conf.tsdb.path, bkt, m.Labels, metadata.SidecarSource, - conf.shipper.uploadCompacted, conf.shipper.allowOutOfOrderUpload) + var s *shipper.Shipper + if conf.shipper.uploadCompacted { + s = shipper.NewWithCompacted(logger, reg, conf.tsdb.path, bkt, m.Labels, metadata.SidecarSource, conf.shipper.allowOutOfOrderUpload) + } else { + s = shipper.New(logger, reg, conf.tsdb.path, bkt, m.Labels, metadata.SidecarSource, conf.shipper.allowOutOfOrderUpload) + } return runutil.Repeat(30*time.Second, ctx.Done(), func() error { if uploaded, err := s.Sync(ctx); err != nil { diff --git a/docs/components/sidecar.md b/docs/components/sidecar.md index bf684759547..31616d5f692 100644 --- a/docs/components/sidecar.md +++ b/docs/components/sidecar.md @@ -32,7 +32,7 @@ Prometheus servers connected to the Thanos cluster via the sidecar are subject t If you choose to use the sidecar to also upload data to object storage: * Must specify object storage (`--objstore.*` flags) -* It only uploads uncompacted Prometheus blocks. For compacted blocks, see [Upload compacted blocks](./sidecar.md/#upload-compacted-blocks). +* It only uploads uncompacted Prometheus blocks. For compacted blocks, see [Upload compacted blocks](./sidecar.md/#upload-compacted-blocks-experimental). * The `--storage.tsdb.min-block-duration` and `--storage.tsdb.max-block-duration` must be set to equal values to disable local compaction on order to use Thanos sidecar upload, otherwise leave local compaction on if sidecar just exposes StoreAPI and your retention is normal. The default of `2h` is recommended. Mentioned parameters set to equal values disable the internal Prometheus compaction, which is needed to avoid the uploaded data corruption when Thanos compactor does its job, this is critical for data consistency and should not be ignored if you plan to use Thanos compactor. Even though you set mentioned parameters equal, you might observe Prometheus internal metric `prometheus_tsdb_compactions_total` being incremented, don't be confused by that: Prometheus writes initial head block to filesytem via its internal compaction mechanism, but if you have followed recommendations - data won't be modified by Prometheus before the sidecar uploads it. Thanos sidecar will also check sanity of the flags set to Prometheus on the startup and log errors or warning if they have been configured improperly (#838). * The retention is recommended to not be lower than three times the min block duration, so 6 hours. This achieves resilience in the face of connectivity issues to the object storage since all local data will remain available within the Thanos cluster. If connectivity gets restored the backlog of blocks gets uploaded to the object storage. @@ -70,7 +70,7 @@ config: bucket: example-bucket ``` -## Upload compacted blocks +## Upload compacted blocks (EXPERIMENTAL) If you want to migrate from a pure Prometheus setup to Thanos and have to keep the historical data, you can use the flag `--shipper.upload-compacted`. This will also upload blocks that were compacted by Prometheus. Values greater than 1 in the `compaction.level` field of a Prometheus blockโ€™s `meta.json` file indicate level of compaction. diff --git a/go.mod b/go.mod index 3562c93cd0d..398b2adb878 100644 --- a/go.mod +++ b/go.mod @@ -54,7 +54,6 @@ require ( github.com/uber/jaeger-lib v2.2.0+incompatible go.elastic.co/apm v1.5.0 go.elastic.co/apm/module/apmot v1.5.0 - go.uber.org/atomic v1.6.0 go.uber.org/automaxprocs v1.2.0 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d diff --git a/pkg/alert/alert.go b/pkg/alert/alert.go index 59dd4c83f57..0a7b4b30e1f 100644 --- a/pkg/alert/alert.go +++ b/pkg/alert/alert.go @@ -14,6 +14,7 @@ import ( "net/url" "path" "sync" + "sync/atomic" "time" "github.com/go-kit/kit/log" @@ -24,7 +25,6 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/pkg/labels" - "go.uber.org/atomic" "github.com/thanos-io/thanos/pkg/runutil" "github.com/thanos-io/thanos/pkg/tracing" @@ -370,7 +370,7 @@ func (s *Sender) Send(ctx context.Context, alerts []*Alert) { var ( wg sync.WaitGroup - numSuccess atomic.Uint64 + numSuccess uint64 ) for _, am := range s.alertmanagers { for _, u := range am.dispatcher.Endpoints() { @@ -396,14 +396,14 @@ func (s *Sender) Send(ctx context.Context, alerts []*Alert) { s.latency.WithLabelValues(u.Host).Observe(time.Since(start).Seconds()) s.sent.WithLabelValues(u.Host).Add(float64(len(alerts))) - numSuccess.Inc() + atomic.AddUint64(&numSuccess, 1) }) }(am, *u) } } wg.Wait() - if numSuccess.Load() > 0 { + if numSuccess > 0 { return } diff --git a/pkg/prober/http.go b/pkg/prober/http.go index 74bc5f6dcbf..fe273741f1d 100644 --- a/pkg/prober/http.go +++ b/pkg/prober/http.go @@ -6,18 +6,18 @@ package prober import ( "io" "net/http" + "sync/atomic" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" - "go.uber.org/atomic" ) type check func() bool // HTTPProbe represents health and readiness status of given component, and provides HTTP integration. type HTTPProbe struct { - ready atomic.Uint32 - healthy atomic.Uint32 + ready uint32 + healthy uint32 } // NewHTTP returns HTTPProbe representing readiness and healthiness of given component. @@ -49,33 +49,34 @@ func (p *HTTPProbe) handler(logger log.Logger, c check) http.HandlerFunc { // isReady returns true if component is ready. func (p *HTTPProbe) isReady() bool { - ready := p.ready.Load() + ready := atomic.LoadUint32(&p.ready) return ready > 0 } // isHealthy returns true if component is healthy. func (p *HTTPProbe) isHealthy() bool { - healthy := p.healthy.Load() + healthy := atomic.LoadUint32(&p.healthy) return healthy > 0 } // Ready sets components status to ready. func (p *HTTPProbe) Ready() { - p.ready.Swap(1) + atomic.SwapUint32(&p.ready, 1) } // NotReady sets components status to not ready with given error as a cause. func (p *HTTPProbe) NotReady(err error) { - p.ready.Swap(0) + atomic.SwapUint32(&p.ready, 0) } // Healthy sets components status to healthy. func (p *HTTPProbe) Healthy() { - p.healthy.Swap(1) + atomic.SwapUint32(&p.healthy, 1) + } // NotHealthy sets components status to not healthy with given error as a cause. func (p *HTTPProbe) NotHealthy(err error) { - p.healthy.Swap(0) + atomic.SwapUint32(&p.healthy, 0) } diff --git a/pkg/receive/multitsdb.go b/pkg/receive/multitsdb.go index 017224d566b..889544f5411 100644 --- a/pkg/receive/multitsdb.go +++ b/pkg/receive/multitsdb.go @@ -282,7 +282,6 @@ func (t *MultiTSDB) startTSDB(logger log.Logger, tenantID string, tenant *tenant t.bucket, func() labels.Labels { return lbls }, metadata.ReceiveSource, - false, t.allowOutOfOrderUpload, ) } diff --git a/pkg/reloader/reloader.go b/pkg/reloader/reloader.go index 6f406ab9a24..b3b43f98853 100644 --- a/pkg/reloader/reloader.go +++ b/pkg/reloader/reloader.go @@ -288,7 +288,7 @@ func (r *Reloader) apply(ctx context.Context) error { return err } - // filepath.Walk uses Lstat to retrieve os.FileInfo. Lstat does not + // filepath.Walk uses Lstat to retriev os.FileInfo. Lstat does not // follow symlinks. Make sure to follow a symlink before checking // if it is a directory. targetFile, err := os.Stat(path) diff --git a/pkg/reloader/reloader_test.go b/pkg/reloader/reloader_test.go index af8b1f25931..30fe5f4c13d 100644 --- a/pkg/reloader/reloader_test.go +++ b/pkg/reloader/reloader_test.go @@ -15,12 +15,12 @@ import ( "path/filepath" "strings" "sync" + "sync/atomic" "testing" "time" "github.com/fortytw2/leaktest" "github.com/thanos-io/thanos/pkg/testutil" - "go.uber.org/atomic" ) func TestReloader_ConfigApply(t *testing.T) { diff --git a/pkg/shipper/shipper.go b/pkg/shipper/shipper.go index 30496e43c04..5c961f4459f 100644 --- a/pkg/shipper/shipper.go +++ b/pkg/shipper/shipper.go @@ -83,9 +83,8 @@ type Shipper struct { allowOutOfOrderUploads bool } -// New creates a new shipper that detects new TSDB blocks in dir and uploads them to -// remote if necessary. It attaches the Thanos metadata section in each meta JSON file. -// If uploadCompacted is enabled, it also uploads compacted blocks which are already in filesystem. +// New creates a new shipper that detects new TSDB blocks in dir and uploads them +// to remote if necessary. It attaches the Thanos metadata section in each meta JSON file. func New( logger log.Logger, r prometheus.Registerer, @@ -93,7 +92,6 @@ func New( bucket objstore.Bucket, lbls func() labels.Labels, source metadata.SourceType, - uploadCompacted bool, allowOutOfOrderUploads bool, ) *Shipper { if logger == nil { @@ -108,10 +106,40 @@ func New( dir: dir, bucket: bucket, labels: lbls, - metrics: newMetrics(r, uploadCompacted), + metrics: newMetrics(r, false), source: source, allowOutOfOrderUploads: allowOutOfOrderUploads, - uploadCompacted: uploadCompacted, + } +} + +// NewWithCompacted creates a new shipper that detects new TSDB blocks in dir and uploads them +// to remote if necessary, including compacted blocks which are already in filesystem. +// It attaches the Thanos metadata section in each meta JSON file. +func NewWithCompacted( + logger log.Logger, + r prometheus.Registerer, + dir string, + bucket objstore.Bucket, + lbls func() labels.Labels, + source metadata.SourceType, + allowOutOfOrderUploads bool, +) *Shipper { + if logger == nil { + logger = log.NewNopLogger() + } + if lbls == nil { + lbls = func() labels.Labels { return nil } + } + + return &Shipper{ + logger: logger, + dir: dir, + bucket: bucket, + labels: lbls, + metrics: newMetrics(r, true), + source: source, + uploadCompacted: true, + allowOutOfOrderUploads: allowOutOfOrderUploads, } } diff --git a/pkg/shipper/shipper_e2e_test.go b/pkg/shipper/shipper_e2e_test.go index e0383e4432d..9fac4ca00d9 100644 --- a/pkg/shipper/shipper_e2e_test.go +++ b/pkg/shipper/shipper_e2e_test.go @@ -45,7 +45,7 @@ func TestShipper_SyncBlocks_e2e(t *testing.T) { }() extLset := labels.FromStrings("prometheus", "prom-1") - shipper := New(log.NewLogfmtLogger(os.Stderr), nil, dir, metricsBucket, func() labels.Labels { return extLset }, metadata.TestSource, false, false) + shipper := New(log.NewLogfmtLogger(os.Stderr), nil, dir, metricsBucket, func() labels.Labels { return extLset }, metadata.TestSource, false) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -219,7 +219,7 @@ func TestShipper_SyncBlocksWithMigrating_e2e(t *testing.T) { defer upcancel2() testutil.Ok(t, p.WaitPrometheusUp(upctx2)) - shipper := New(log.NewLogfmtLogger(os.Stderr), nil, dir, bkt, func() labels.Labels { return extLset }, metadata.TestSource, true, false) + shipper := NewWithCompacted(log.NewLogfmtLogger(os.Stderr), nil, dir, bkt, func() labels.Labels { return extLset }, metadata.TestSource, false) // Create 10 new blocks. 9 of them (non compacted) should be actually uploaded. var ( diff --git a/pkg/shipper/shipper_test.go b/pkg/shipper/shipper_test.go index 59c564df3c4..32cdeedf3b7 100644 --- a/pkg/shipper/shipper_test.go +++ b/pkg/shipper/shipper_test.go @@ -26,7 +26,7 @@ func TestShipperTimestamps(t *testing.T) { testutil.Ok(t, os.RemoveAll(dir)) }() - s := New(nil, nil, dir, nil, nil, metadata.TestSource, false, false) + s := New(nil, nil, dir, nil, nil, metadata.TestSource, false) // Missing thanos meta file. _, _, err = s.Timestamps() @@ -123,7 +123,7 @@ func TestIterBlockMetas(t *testing.T) { }, })) - shipper := New(nil, nil, dir, nil, nil, metadata.TestSource, false, false) + shipper := New(nil, nil, dir, nil, nil, metadata.TestSource, false) metas, err := shipper.blockMetasFromOldest() testutil.Ok(t, err) testutil.Equals(t, sort.SliceIsSorted(metas, func(i, j int) bool { @@ -162,7 +162,7 @@ func BenchmarkIterBlockMetas(b *testing.B) { }) b.ResetTimer() - shipper := New(nil, nil, dir, nil, nil, metadata.TestSource, false, false) + shipper := New(nil, nil, dir, nil, nil, metadata.TestSource, false) _, err = shipper.blockMetasFromOldest() testutil.Ok(b, err) diff --git a/pkg/store/bucket_test.go b/pkg/store/bucket_test.go index ef6a3068540..951b83f732f 100644 --- a/pkg/store/bucket_test.go +++ b/pkg/store/bucket_test.go @@ -19,6 +19,7 @@ import ( "sort" "strconv" "sync" + "sync/atomic" "testing" "time" @@ -51,7 +52,6 @@ import ( storetestutil "github.com/thanos-io/thanos/pkg/store/storepb/testutil" "github.com/thanos-io/thanos/pkg/testutil" "github.com/thanos-io/thanos/pkg/testutil/e2eutil" - "go.uber.org/atomic" ) var emptyRelabelConfig = make([]*relabel.Config, 0) @@ -1281,10 +1281,10 @@ func benchBucketSeries(t testutil.TB, samplesPerSeries, totalSeries int, request if !t.IsBenchmark() { // Make sure the pool is correctly used. This is expected for 200k numbers. - testutil.Equals(t, numOfBlocks, int(chunkPool.(*mockedPool).gets.Load())) + testutil.Equals(t, numOfBlocks, int(chunkPool.(*mockedPool).gets)) // TODO(bwplotka): This is wrong negative for large number of samples (1mln). Investigate. - testutil.Equals(t, 0, int(chunkPool.(*mockedPool).balance.Load())) - chunkPool.(*mockedPool).gets.Store(0) + testutil.Equals(t, 0, int(chunkPool.(*mockedPool).balance)) + chunkPool.(*mockedPool).gets = 0 for _, b := range blocks { // NOTE(bwplotka): It is 4 x 1.0 for 100mln samples. Kind of make sense: long series. @@ -1306,8 +1306,8 @@ func (m fakePool) Put(_ *[]byte) {} type mockedPool struct { parent pool.BytesPool - balance atomic.Uint64 - gets atomic.Uint64 + balance uint64 + gets uint64 } func (m *mockedPool) Get(sz int) (*[]byte, error) { @@ -1315,13 +1315,13 @@ func (m *mockedPool) Get(sz int) (*[]byte, error) { if err != nil { return nil, err } - m.balance.Add(uint64(cap(*b))) - m.gets.Add(uint64(1)) + atomic.AddUint64(&m.balance, uint64(cap(*b))) + atomic.AddUint64(&m.gets, uint64(1)) return b, nil } func (m *mockedPool) Put(b *[]byte) { - m.balance.Sub(uint64(cap(*b))) + atomic.AddUint64(&m.balance, ^uint64(cap(*b)-1)) m.parent.Put(b) } diff --git a/pkg/store/limiter.go b/pkg/store/limiter.go index c60be901e92..1e354721c23 100644 --- a/pkg/store/limiter.go +++ b/pkg/store/limiter.go @@ -5,10 +5,10 @@ package store import ( "sync" + "sync/atomic" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "go.uber.org/atomic" ) type ChunksLimiter interface { @@ -25,7 +25,7 @@ type ChunksLimiterFactory func(failedCounter prometheus.Counter) ChunksLimiter // Limiter is a simple mechanism for checking if something has passed a certain threshold. type Limiter struct { limit uint64 - reserved atomic.Uint64 + reserved uint64 // Counter metric which we will increase if limit is exceeded. failedCounter prometheus.Counter @@ -42,7 +42,7 @@ func (l *Limiter) Reserve(num uint64) error { if l.limit == 0 { return nil } - if reserved := l.reserved.Add(num); reserved > l.limit { + if reserved := atomic.AddUint64(&l.reserved, num); reserved > l.limit { // We need to protect from the counter being incremented twice due to concurrency // while calling Reserve(). l.failedOnce.Do(l.failedCounter.Inc) diff --git a/tutorials/katacoda/thanos/2-lts/courseBase.sh b/tutorials/katacoda/thanos/2-lts/courseBase.sh index ae5f24a71e0..fb40f8c248a 100644 --- a/tutorials/katacoda/thanos/2-lts/courseBase.sh +++ b/tutorials/katacoda/thanos/2-lts/courseBase.sh @@ -2,5 +2,5 @@ docker pull dockerenginesonia/thanosbench:v7 docker pull quay.io/prometheus/prometheus:v2.19.0 -docker pull quay.io/thanos/thanos:v0.13.0 +docker pull quay.io/thanos/thanos:v0.15.0 docker pull minio/minio:RELEASE.2019-01-31T00-31-19Z \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/finish.md b/tutorials/katacoda/thanos/2-lts/finish.md index beac3eff966..eee4b96ffc7 100644 --- a/tutorials/katacoda/thanos/2-lts/finish.md +++ b/tutorials/katacoda/thanos/2-lts/finish.md @@ -1,3 +1,17 @@ # Summary -### Feedback \ No newline at end of file +Congratulations! ๐ŸŽ‰๐ŸŽ‰๐ŸŽ‰ +You completed our second Thanos tutorial. Let's summarize what we learned: + +* To preserve the data beyond Prometheus regular retention time, we used an object storage system for backing up our historical data. +* The Thanos Store component acts as a data retrieval proxy for data inside our object storage. +* With Sidecar uploading metric blocks to the object store as soon as it is written to disk, it keeps the โ€œscraperโ€ (Prometheus with Thanos Sidecar), lightweight. This simplifies maintenance, cost, and system design. +* Thanos Compactor improved query efficiency and also reduced the required storage size. + +See next courses for other tutorials about different deployment models and more advanced features of Thanos! + +### Feedback + +Do you see any bug, typo in the tutorial or you have some feedback for us? + +let us know on https://github.com/thanos-io/thanos or #thanos slack channel linked on https://thanos.io \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/index.json b/tutorials/katacoda/thanos/2-lts/index.json index 8d7b8405fb0..be58ab264f8 100644 --- a/tutorials/katacoda/thanos/2-lts/index.json +++ b/tutorials/katacoda/thanos/2-lts/index.json @@ -18,12 +18,11 @@ { "title": "Thanos Store Gateway", "text": "step3.md", - "verify": "step3-verify.sh" + "answer": "step3-answer.md" }, { "title": "Thanos Compactor", - "text": "step4.md", - "verify": "step4-verify.sh" + "text": "step4.md" } ], "intro": { diff --git a/tutorials/katacoda/thanos/2-lts/intro.md b/tutorials/katacoda/thanos/2-lts/intro.md index 6519c8b95e6..d5617abe3b6 100644 --- a/tutorials/katacoda/thanos/2-lts/intro.md +++ b/tutorials/katacoda/thanos/2-lts/intro.md @@ -4,7 +4,11 @@ This course uses docker containers with pre-built docker images. In this tutorial, you will learn about : -* Thanos Store Gateway : a metric browser that serves metric blocks stored in Object Store via *StoreAPI* gRPC API. -* Unlimited metric retention for Prometheus. +* How to start uploading your Prometheus data seamlessly to cheap object storage thanks to Thanos sidecar. +* How to further query data in object storage thanks to Thanos Store Gateway: a metric browser that serves metric blocks stored in Object Store via *StoreAPI* gRPC API. +* How to query both fresh and older data in easy way through Thanos Querier. -Let's jump in! \ No newline at end of file + +All of this allows you to keep your metrics in cheap and reliable object storage, allowing virtually unlimited metric retention for Prometheus. + +Let's jump in! diff --git a/tutorials/katacoda/thanos/2-lts/query.png b/tutorials/katacoda/thanos/2-lts/query.png new file mode 100644 index 00000000000..f847df45d6b Binary files /dev/null and b/tutorials/katacoda/thanos/2-lts/query.png differ diff --git a/tutorials/katacoda/thanos/2-lts/step1-verify.sh b/tutorials/katacoda/thanos/2-lts/step1-verify.sh index e65bb534b8e..91cffc97c5d 100644 --- a/tutorials/katacoda/thanos/2-lts/step1-verify.sh +++ b/tutorials/katacoda/thanos/2-lts/step1-verify.sh @@ -4,4 +4,6 @@ curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 +curl -s 127.0.0.1:29090/metrics > /dev/null || exit 1 + echo "\"done\"" \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step1.md b/tutorials/katacoda/thanos/2-lts/step1.md index 31c2ebfb984..63581995cfe 100644 --- a/tutorials/katacoda/thanos/2-lts/step1.md +++ b/tutorials/katacoda/thanos/2-lts/step1.md @@ -4,26 +4,30 @@ Thanos is a set of components that adds high availability to Prometheus installa Thanos builds upon existing Prometheus instances which makes it seamlessly integrates into existing Prometheus setups. -In this tutorial, we will be using one Prometheus Server. +In this tutorial, we will mimic the usual state with a Prometheus server running for several months. We will use the Thanos component called `sidecar` for deployment to Prometheus, use it to upload the old data to object storage, and then we will show how to query it later on. -Let's start this initial Prometheus setup, ready? +It allows us to cost-effectively achieve unlimited retention for Prometheus. +Let's start this initial Prometheus setup, ready? ## Generate Artifical Metric Data -Execute the following command: +Before starting Prometheus, let's generate some artificial data. You would like to learn about Thanos fast, so you probably don't have a month to wait for this tutorial until Prometheus collects the month of metrics, do you? (: -``` -mkdir -p test -```{{execute}} +We will use our handy [thanosbench](link here) project to do so. + +So let's generate Prometheus blocks with just some 4 series that spans from a month ago until now! + +Execute the following command: ``` -docker run -i dockerenginesonia/thanosbench:v7 block plan -p realistic-key-k8s-1d-small --labels 'cluster="one"' --max-time 2019-10-18T00:00:00Z | docker run -v /root/test:/test -i dockerenginesonia/thanosbench:v7 block gen --output.dir test +mkdir -p test && docker run -i dockerenginesonia/thanosbench:v7 block plan -p realistic-key-k8s-1d-small --labels 'cluster="one"' --max-time 2019-10-18T00:00:00Z | docker run -v /root/test:/test -i dockerenginesonia/thanosbench:v7 block gen --output.dir test ```{{execute}} ## Prometheus Configuration Files -Here, we will prepare configuration files for the Prometheus instance. +Here, we will prepare configuration files for the Prometheus instance that will run with our pre-generated data. +It will also scrape our components we will use in this tutorial. Click `Copy To Editor` for config to propagate the configs to file. @@ -48,12 +52,14 @@ scrape_configs: - targets: ['127.0.0.1:9000'] - job_name: 'store_gateway' static_configs: - - targets: ['127.0.0.1:19090'] + - targets: ['127.0.0.1:19095'] ## Starting Prometheus Instances -Let's now start the container representing Prometheus instance. +Let's now start the container representing Prometheus instance. + +Note `-v $(pwd)/test:/prometheus \` and `--storage.tsdb.path=/prometheus` that allows us to place our generated data in Prometheus data directory. Execute the following commands: @@ -87,7 +93,7 @@ Once started you should be able to reach the Prometheus instance here: # Installing Thanos sidecar -At the end of this step, we will have running Prometheus instance with sidecar deployed. You can read more about sidecar [here](https://thanos.io/components/sidecar.md/). +At the end of this step, we will have running Prometheus instance with sidecar deployed. You can read more about sidecar [here](https://thanos.io/tip/components/sidecar.md/). ## Deployment @@ -102,7 +108,7 @@ docker run -d --net=host --rm \ -v $(pwd)/test:/prometheus \ --name prometheus-0-sidecar-eu1 \ -u root \ - quay.io/thanos/thanos:v0.13.0 \ + quay.io/thanos/thanos:v0.15.0 \ sidecar \ --http-address 0.0.0.0:19090 \ --grpc-address 0.0.0.0:19190 \ @@ -124,7 +130,7 @@ Tip: Look for `prometheus_tsdb_reloads_total` metric ๐Ÿ•ต๏ธโ€ ## Deploying Thanos Querier -Let' now start the Query component. As you remember [Thanos sidecar](https://thanos.io/components/query.md/) exposes `StoreAPI` +Let' now start the Query component. As you remember [Thanos sidecar](https://thanos.io/tip/components/query.md/) exposes `StoreAPI` so we will make sure we point the Querier to the gRPC endpoints of the sidecar: Click below snippet to start the Querier. @@ -132,12 +138,12 @@ Click below snippet to start the Querier. ``` docker run -d --net=host --rm \ --name querier \ - quay.io/thanos/thanos:v0.13.0 \ + quay.io/thanos/thanos:v0.15.0 \ query \ --http-address 0.0.0.0:29090 \ --query.replica-label replica \ --store 127.0.0.1:19190 \ - --store 127.0.0.1:19191 && echo "Started Thanos Querier" + --store 127.0.0.1:10906 && echo "Started Thanos Querier" ```{{execute}} ## Setup verification @@ -146,4 +152,4 @@ Thanos Querier exposes very similar UI to the Prometheus, but on top of many `St To check if the Querier works as intended let's look on [Querier UI `Store` page](https://[[HOST_SUBDOMAIN]]-29090-[[KATACODA_HOST]].environments.katacoda.com/stores). -This should list the sidecar, including the external label. \ No newline at end of file +This should list the sidecar, including the external label. diff --git a/tutorials/katacoda/thanos/2-lts/step2-verify.sh b/tutorials/katacoda/thanos/2-lts/step2-verify.sh index b3c9d2b69e1..cb969231c82 100644 --- a/tutorials/katacoda/thanos/2-lts/step2-verify.sh +++ b/tutorials/katacoda/thanos/2-lts/step2-verify.sh @@ -4,8 +4,10 @@ curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 +curl -s 127.0.0.1:29090/metrics > /dev/null || exit 1 + curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 -curl -s 127.0.0.1:9000/metrics > /dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 echo "\"done\"" \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step2.md b/tutorials/katacoda/thanos/2-lts/step2.md index c1ce46cbd0c..75aff0ce7e9 100644 --- a/tutorials/katacoda/thanos/2-lts/step2.md +++ b/tutorials/katacoda/thanos/2-lts/step2.md @@ -7,11 +7,7 @@ In this step, we will configure the object store and change sidecar to upload to Now, execute the command ``` -mkdir -p /storage/thanos -```{{execute}} - -``` -docker run -d --name minio -v /storage:/data -p 9000:9000 -e "MINIO_ACCESS_KEY=minio" -e "MINIO_SECRET_KEY=minio123" minio/minio:RELEASE.2019-01-31T00-31-19Z server /data +mkdir -p /storage/thanos && docker run -d --name minio -v /storage:/data -p 9000:9000 -e "MINIO_ACCESS_KEY=minio" -e "MINIO_SECRET_KEY=minio123" minio/minio:RELEASE.2019-01-31T00-31-19Z server /data ```{{execute}} ## Verification @@ -20,6 +16,11 @@ Now, you should have minio running well. To check if the Minio is working as intended, let's check out [here](https://[[HOST_SUBDOMAIN]]-9000-[[KATACODA_HOST]].environments.katacoda.com/minio/) +Enter the credentials as mentioned below: + +**Access Key** = `minio` +**Secret Key** = `minio123` + ## Configuration : The configuration file content : @@ -51,7 +52,7 @@ docker run -d --net=host --rm \ -v $(pwd)/test:/prometheus \ --name sidecar \ -u root \ - quay.io/thanos/thanos:v0.13.0 \ + quay.io/thanos/thanos:v0.15.0 \ sidecar \ --tsdb.path /prometheus \ --objstore.config-file /etc/prometheus/bucket_storage.yml \ @@ -62,4 +63,6 @@ docker run -d --net=host --rm \ The flag `--objstore.config-file` loads all the required configuration from the file to ship the TSDB blocks to an object storage bucket, the storage endpoints, and the credentials used. -## Verification \ No newline at end of file +## Verification + +We can check whether the data is uploaded into `thanos` bucket by visitng [Minio](https://[[HOST_SUBDOMAIN]]-9000-[[KATACODA_HOST]].environments.katacoda.com/minio/). The stored metrics will also be available in the object storage. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step3-answer.md b/tutorials/katacoda/thanos/2-lts/step3-answer.md new file mode 100644 index 00000000000..06e48a1d847 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step3-answer.md @@ -0,0 +1,19 @@ +## Answer + +**In an HA Prometheus setup with Thanos sidecars, would there be issues with multiple sidecars attempting to upload the same data blocks to object storage?** + +This is handled by having unique **external labels** for all Prometheus, sidecar instances and HA replicas. To indicate that all replicas are storing same targets, they differ only in one label. + +For an instance, consider the situation below: + +``` +First: +"cluster": "prod1" +"replica": "0" + +Second: +"cluster":"prod1" +"replica": "1" +``` + +There is no problem with storing them since the label sets are **unique**. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step3-verify.sh b/tutorials/katacoda/thanos/2-lts/step3-verify.sh index b3c9d2b69e1..979b3df4206 100644 --- a/tutorials/katacoda/thanos/2-lts/step3-verify.sh +++ b/tutorials/katacoda/thanos/2-lts/step3-verify.sh @@ -4,8 +4,12 @@ curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 +curl -s 127.0.0.1:29090/metrics > /dev/null || exit 1 + curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 -curl -s 127.0.0.1:9000/metrics > /dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 echo "\"done\"" \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step3.md b/tutorials/katacoda/thanos/2-lts/step3.md index 03b2568a5d6..05e51e6b392 100644 --- a/tutorials/katacoda/thanos/2-lts/step3.md +++ b/tutorials/katacoda/thanos/2-lts/step3.md @@ -8,7 +8,7 @@ Thanos is a single Go binary capable to run in different modes. Each mode repres Let's take a look at all the Thanos commands: -```docker run --rm quay.io/thanos/thanos:v0.12.2 --help```{{execute}} +```docker run --rm quay.io/thanos/thanos:v0.15.0 --help```{{execute}} You should see multiple commands that solve different purposes, block storage based long-term storage for Prometheus. @@ -27,11 +27,11 @@ In this step we will focus on thanos `store gateway`: This data is generally safe to delete across restarts at the cost of increased startup times. -You can read more about [Store](https://thanos.io/components/store.md/) here. +You can read more about [Store](https://thanos.io/tip/components/store.md/) here. ## Deployment -Click snippets to deploy thanos store to each Prometheus instance. +Click on the snippet to deploy thanos store to the running Prometheus instance. ### Deploying store to "EU1" Prometheus @@ -40,20 +40,34 @@ docker run -d --net=host --rm \ -v $(pwd)/bucket_storage.yml:/etc/prometheus/bucket_storage.yml \ -v $(pwd)/test:/prometheus \ --name thanos-store \ - quay.io/thanos/thanos:v0.12.2 \ + quay.io/thanos/thanos:v0.15.0 \ store \ --data-dir /prometheus \ --objstore.config-file /etc/prometheus/bucket_storage.yml \ - --http-address 0.0.0.0:19190 \ - --grpc-address 0.0.0.0:19091 + --http-address 0.0.0.0:10905 \ + --grpc-address 0.0.0.0:10906 && echo "Thanos Store added" ```{{execute}} +## How to query Thanos store data? + +In this step, we will see how we can query Thanos store data which has access to historical data from the `thanos` bucket, and let's play with this setup a bit. + +Click on the [Querier UI `Graph` page](https://[[HOST_SUBDOMAIN]]-29090-[[KATACODA_HOST]].environments.katacoda.com/graph) and try querying data for a year or two by inserting metrics [k8s_app_metric0](https://[[HOST_SUBDOMAIN]]-29090-[[KATACODA_HOST]].environments.katacoda.com/graph?g0.expr=k8s_app_metric0&g0.tab=1). Make sure `deduplication` is selected and you will be able to discover all the data fetched by Thanos store. + +![](https://github.com/soniasingla/thanos/raw/master/tutorials/katacoda/thanos/2-lts/query.png) + +Also, you can check all the active endpoints located by thanos-store by clicking on [Stores](https://[[HOST_SUBDOMAIN]]-29090-[[KATACODA_HOST]].environments.katacoda.com/stores). + +We've added Thanos Query, a web and API frontend that can query a Prometheus instance and Thanos Store at the same time, which gives transparent access to the archived blocks and real-time metrics. The vanilla PromQL Prometheus engine used for evaluating the query deduces what time series and for what time ranges we need to fetch the data. Also, StoreAPIs propagate external labels and the time range they have data for, so we can do basic filtering on this. However, if you don't specify any of these in the query (only "up" series) the querier concurrently asks all the StoreAPI servers. It might cause a duplication of results between sidecar and store data. + ## Question Time? ๐Ÿค” In an HA Prometheus setup with Thanos sidecars, would there be issues with multiple sidecars attempting to upload the same data blocks to object storage? Think over this ๐Ÿ˜‰ +To see the answer to this question click SHOW SOLUTION below. + ## Next Voila! In the next step, we will talk about Thanos Compactor, it's retention capabilities, and how it improves query efficiency and reduce the required storage size. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step4-verify.sh b/tutorials/katacoda/thanos/2-lts/step4-verify.sh index 4a5e523a010..f5ee7ba4fa6 100644 --- a/tutorials/katacoda/thanos/2-lts/step4-verify.sh +++ b/tutorials/katacoda/thanos/2-lts/step4-verify.sh @@ -4,10 +4,14 @@ curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 +curl -s 127.0.0.1:29090/metrics > /dev/null || exit 1 + curl -s 127.0.0.1:9090/metrics > /dev/null || exit 1 -curl -s 127.0.0.1:9000/metrics > /dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 -curl -s 127.0.0.1:19191/metrics > /dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics > /dev/null || exit 1 echo "\"done\"" \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step4.md b/tutorials/katacoda/thanos/2-lts/step4.md index 9a464428679..8c437b86c11 100644 --- a/tutorials/katacoda/thanos/2-lts/step4.md +++ b/tutorials/katacoda/thanos/2-lts/step4.md @@ -10,7 +10,7 @@ The `Compactor` is an essential component that operates on a single object stora It is also responsible for downsampling of data, performing 5m downsampling after 40 hours, and 1h downsampling after 10 days. -If you want to know more about Thanos Compactor, jump [here](https://thanos.io/components/compact.md/). +If you want to know more about Thanos Compactor, jump [here](https://thanos.io/tip/components/compact.md/). **Note**: Thanos Compactor is mandatory if you use object storage otherwise Thanos Store Gateway will be too slow without using a compactor. @@ -20,12 +20,14 @@ Click below snippet to start the Compactor. ``` docker run -d --net=host --rm \ + -v $(pwd)/bucket_storage.yml:/etc/prometheus/bucket_storage.yml \ --name thanos compact \ - quay.io/thanos/thanos:v0.12.2 \ + quay.io/thanos/thanos:v0.15.0 \ compact \ - --data-dir /var/thanos/compact \ - --objstore.config-file bucket_storage.yml \ - --http-address 0.0.0.0:19191 + --data-dir /prometheus \ + --objstore.config-file /etc/prometheus/ + bucket_storage.yml \ + --http-address 0.0.0.0:19092 ```{{execute}} ## Unlimited Retention - Not Challenging anymore?