diff --git a/tutorials/katacoda/thanos-pathway.json b/tutorials/katacoda/thanos-pathway.json index 02f647ae89..5b4b1ab3df 100644 --- a/tutorials/katacoda/thanos-pathway.json +++ b/tutorials/katacoda/thanos-pathway.json @@ -11,7 +11,7 @@ { "course_id": "2-lts", "title": "Intro: Downsampling and unlimited metric retention for Prometheus", - "description": "In progress. Stay Tuned!" + "description": "Learn how to extend your metric retention in a cheap and easy way with Thanos." }, { "course_id": "3-meta-monitoring", diff --git a/tutorials/katacoda/thanos/2-lts/courseBase.sh b/tutorials/katacoda/thanos/2-lts/courseBase.sh index f1f641af19..cc628b61a3 100644 --- a/tutorials/katacoda/thanos/2-lts/courseBase.sh +++ b/tutorials/katacoda/thanos/2-lts/courseBase.sh @@ -1 +1,8 @@ #!/usr/bin/env bash + +docker pull minio/minio:RELEASE.2019-01-31T00-31-19Z +docker pull quay.io/prometheus/prometheus:v2.20.0 +docker pull quay.io/thanos/thanos:v0.16.0 +docker pull quay.io/thanos/thanosbench:v0.2.0-rc.1 + +mkdir /root/editor diff --git a/tutorials/katacoda/thanos/2-lts/finish.md b/tutorials/katacoda/thanos/2-lts/finish.md new file mode 100644 index 0000000000..eee4b96ffc --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/finish.md @@ -0,0 +1,17 @@ +# Summary + +Congratulations! πŸŽ‰πŸŽ‰πŸŽ‰ +You completed our second Thanos tutorial. Let's summarize what we learned: + +* To preserve the data beyond Prometheus regular retention time, we used an object storage system for backing up our historical data. +* The Thanos Store component acts as a data retrieval proxy for data inside our object storage. +* With Sidecar uploading metric blocks to the object store as soon as it is written to disk, it keeps the β€œscraper” (Prometheus with Thanos Sidecar), lightweight. This simplifies maintenance, cost, and system design. +* Thanos Compactor improved query efficiency and also reduced the required storage size. + +See next courses for other tutorials about different deployment models and more advanced features of Thanos! + +### Feedback + +Do you see any bug, typo in the tutorial or you have some feedback for us? + +let us know on https://github.com/thanos-io/thanos or #thanos slack channel linked on https://thanos.io \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/index.json b/tutorials/katacoda/thanos/2-lts/index.json index 35d2f8f836..fc74b70d58 100644 --- a/tutorials/katacoda/thanos/2-lts/index.json +++ b/tutorials/katacoda/thanos/2-lts/index.json @@ -1,17 +1,53 @@ { "title": "Intro: Downsampling and unlimited metric retention for Prometheus", - "description": "Learn how to extend you metric retention in a cheap way with Thanos.", + "description": "Learn how to extend your metric retention in a cheap and easy way with Thanos.", + "difficulty": "Moderate", "details": { "steps": [ + { + "title": "Configuring Initial Prometheus Server", + "text": "step1.md", + "verify": "step1-verify.sh" + }, + { + "title": "Thanos Sidecars", + "text": "step2.md", + "verify": "step2-verify.sh" + }, + { + "title": "Thanos Store Gateway", + "text": "step3.md", + "answer": "step3-answer.md" + }, + { + "title": "Thanos Compactor", + "text": "step4.md" + } ], "intro": { "text": "intro.md", - "courseData": "courseBase.sh" + "courseData": "courseBase.sh", + "credits": "https://thanos.io" + }, + "files": { + "text": "finish.md", + "credits": "test" } }, + "files": [ + "prometheus0_eu1.yml", + "bucket_storage.yaml" + ], "environment": { "uilayout": "editor-terminal", - "uisettings": "yaml" + "uisettings": "yaml", + "uieditorpath": "/root/editor", + "showdashboard": true, + "dashboards": [ + {"name": "Prometheus 0 EU1", "port": 9090}, + {"name": "Minio", "port": 9000}, + {"name": "Thanos Query", "port": 9091} + ] }, "backend": { "imageid": "docker-direct" diff --git a/tutorials/katacoda/thanos/2-lts/intro.md b/tutorials/katacoda/thanos/2-lts/intro.md index 26a56510f0..eeb8e58273 100644 --- a/tutorials/katacoda/thanos/2-lts/intro.md +++ b/tutorials/katacoda/thanos/2-lts/intro.md @@ -1,7 +1,30 @@ -# In progress +# Intro: Downsampling and unlimited metric retention for Prometheus -🚧 This tutorial is in progress. 🚧 +They say that [Thanos](thanos.io) is a set of components that can be composed into a highly available metric system with **unlimited storage capacity** +and that it can be added **seamlessly** on top of existing Prometheus deployments. πŸ€”πŸ€” -Do you want to see this soon or you want to help us? +In this course you can experience all of this yourself. -Let us know on https://github.com/thanos-io/thanos or #thanos slack channel linked on https://thanos.io \ No newline at end of file +In this tutorial, you will learn about: + +* How to start uploading your Prometheus data seamlessly to cheap object storage thanks to Thanos sidecar. +* How to further query your data in object storage thanks to Thanos Store Gateway. +* How to query both fresh and older data in easy way through Thanos Querier. + +All of this allows you to keep your metrics in cheap and reliable object storage, allowing virtually unlimited metric retention for Prometheus. + +> NOTE: This course uses docker containers with pre-built Thanos, Prometheus, and Minio Docker images available publicly. +> However, a similar scenario will work with any other deployment method like Kubernetes or systemd, etc. + +### Prerequisites + +Please complete first intro course about GlobalView before jumping into this one! πŸ€— + +### Feedback + +Do you see any bug, typo in the tutorial or you have some feedback for us? +Let us know on https://github.com/thanos-io/thanos or #thanos slack channel linked on https://thanos.io + +### Contributed by: + +* Sonia Singla [@soniasingla](http://github.com/soniasingla) \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/query.png b/tutorials/katacoda/thanos/2-lts/query.png new file mode 100644 index 0000000000..5561402330 Binary files /dev/null and b/tutorials/katacoda/thanos/2-lts/query.png differ diff --git a/tutorials/katacoda/thanos/2-lts/step1-verify.sh b/tutorials/katacoda/thanos/2-lts/step1-verify.sh new file mode 100644 index 0000000000..7e9a0cd6c2 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step1-verify.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +curl -s 127.0.0.1:9090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:9091/metrics >/dev/null || exit 1 + +echo '"done"' diff --git a/tutorials/katacoda/thanos/2-lts/step1.md b/tutorials/katacoda/thanos/2-lts/step1.md new file mode 100644 index 0000000000..d3649325c7 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step1.md @@ -0,0 +1,145 @@ +# Step 1 - Initial Prometheus Setup + +In this tutorial, we will mimic the usual state with a Prometheus server running for... a year!. +We will use it to seamlessly backup all old data in the object storage and configure Prometheus for continuous backup mode, which +will allow us to cost-effectively achieve unlimited retention for Prometheus. + +Last but not the least, we will go through setting all up for querying and automated maintenance (e.g compactions, retention and downsampling). + +In order to showcase all of this, let's start with single cluster setup from the previous course. Let's start this initial Prometheus setup, ready? + +## Generate Artificial Metrics for 1 year + +Actually, before starting Prometheus, let's generate some **artificial data**. You most likely want to learn about Thanos fast, +so you probably don't have months to wait for this tutorial until Prometheus collects the month of metrics, do you? (: + +We will use our handy [thanosbench](https://github.com/thanos-io/thanosbench) project to do so! Let's generate Prometheus +data (in form of TSDB blocks) with just 5 series (gauges) that spans from a year ago until now (-6h)! + +Execute the following command (should take few seconds): + +``` +mkdir -p /root/prom-eu1 && docker run -i quay.io/thanos/thanosbench:v0.2.0-rc.1 block plan -p continuous-365d-tiny --labels 'cluster="eu1"' --max-time=6h | docker run -v /root/prom-eu1:/prom-eu1 -i quay.io/thanos/thanosbench:v0.2.0-rc.1 block gen --output.dir prom-eu1 +```{{execute}} + +On successful block creation you should see following log lines: + +``` +level=info ts=2020-10-20T18:28:42.625041939Z caller=block.go:87 msg="all blocks done" count=X +level=info ts=2020-10-20T18:28:42.625100758Z caller=main.go:118 msg=exiting cmd="block gen" +``` + +Run `ls -lR /root/prom-eu1` to see dozens of generated TSDB blocks. + +## Prometheus Configuration File + +Here, we will prepare configuration files for the Prometheus instance that will run with our pre-generated data. +It will also scrape our components we will use in this tutorial. + +Click `Copy To Editor` for config to propagate the configs to file. + +
+global:
+  scrape_interval: 5s
+  external_labels:
+    cluster: eu1
+    replica: 0
+    tenant: team-eu # Not needed, but a good practice if you want to grow this to multi-tenant system some day.
+
+scrape_configs:
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['127.0.0.1:9090']
+  - job_name: 'sidecar'
+    static_configs:
+      - targets: ['127.0.0.1:19090']
+  - job_name: 'minio'
+    metrics_path: /minio/prometheus/metrics
+    static_configs:
+      - targets: ['127.0.0.1:9000']
+  - job_name: 'querier'
+    static_configs:
+      - targets: ['127.0.0.1:9091']
+  - job_name: 'store_gateway'
+    static_configs:
+      - targets: ['127.0.0.1:19092']
+
+ +## Starting Prometheus Instance + +Let's now start the container representing Prometheus instance. + +Note `-v /root/prom-eu1:/prometheus \` and `--storage.tsdb.path=/prometheus` that allows us to place our generated data +in Prometheus data directory. + +Let's deploy Prometheus now. Note that we disabled local Prometheus compactions `storage.tsdb.max-block-duration` and `min` flags. +Currently, this is important for the basic object storage backup scenario to avoid conflicts between the bucket and local compactions. +Read more [here](https://thanos.io/tip/components/sidecar.md/#sidecar). + +We also extend Prometheus retention: `--storage.tsdb.retention.time=1000d`. This is because Prometheus by default removes all data older +than 2 weeks. And we have a year (: + +### Deploying "EU1" + +``` +docker run -d --net=host --rm \ + -v /root/editor/prometheus0_eu1.yml:/etc/prometheus/prometheus.yml \ + -v /root/prom-eu1:/prometheus \ + -u root \ + --name prometheus-0-eu1 \ + quay.io/prometheus/prometheus:v2.20.0 \ + --config.file=/etc/prometheus/prometheus.yml \ + --storage.tsdb.retention.time=1000d \ + --storage.tsdb.path=/prometheus \ + --storage.tsdb.max-block-duration=2h \ + --storage.tsdb.min-block-duration=2h \ + --web.listen-address=:9090 \ + --web.external-url=https://[[HOST_SUBDOMAIN]]-9090-[[KATACODA_HOST]].environments.katacoda.com \ + --web.enable-lifecycle \ + --web.enable-admin-api +```{{execute}} + +## Setup Verification + +Once started you should be able to reach the Prometheus instance here and query.. 1 year of data! + +* [Prometheus-0 EU1](https://[[HOST_SUBDOMAIN]]-9090-[[KATACODA_HOST]].environments.katacoda.com/) + +## Thanos Sidecar & Querier + +Similar to previous course, let's setup global view querying with sidecar: + +``` +docker run -d --net=host --rm \ + --name prometheus-0-eu1-sidecar \ + -u root \ + quay.io/thanos/thanos:v0.16.0 \ + sidecar \ + --http-address 0.0.0.0:19090 \ + --grpc-address 0.0.0.0:19190 \ + --prometheus.url http://127.0.0.1:9090 +```{{execute}} + +And Querier. As you remember [Thanos sidecar](https://thanos.io/tip/components/query.md/) exposes `StoreAPI` +so we will make sure we point the Querier to the gRPC endpoints of the sidecar: + +``` +docker run -d --net=host --rm \ + --name querier \ + quay.io/thanos/thanos:v0.16.0 \ + query \ + --http-address 0.0.0.0:9091 \ + --query.replica-label replica \ + --store 127.0.0.1:19190 +```{{execute}} + +## Setup verification + +Similar to previous course let's check if the Querier works as intended. Let's look on +[Querier UI `Store` page](https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com/stores). + +This should list the sidecar, including the external labels. + +On graph you should also see our 5 series for 1y time, thanks to Prometheus and sidecar StorAPI: [Graph](https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com). + +Click `Continue` to see how we can move this data to much cheaper and easier to operate object storage. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step2-verify.sh b/tutorials/katacoda/thanos/2-lts/step2-verify.sh new file mode 100644 index 0000000000..68bdd69733 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step2-verify.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +curl -s 127.0.0.1:9090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:9091/metrics >/dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 + +echo '"done"' diff --git a/tutorials/katacoda/thanos/2-lts/step2.md b/tutorials/katacoda/thanos/2-lts/step2.md new file mode 100644 index 0000000000..81a399a5e0 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step2.md @@ -0,0 +1,97 @@ +# Step 2 - Object Storage Continuous Backup + +Maintaining one year of data within your Prometheus is doable, but not easy. It's tricky to +resize, backup or maintain this data long term. On top of that Prometheus does not do any replication, +so any unavailability of Prometheus results in query unavailability. + +This is where Thanos comes to play. With a single configuration change we can allow Thanos Sidecar to continuously upload blocks of metrics +that are periodically persisted to disk by the Prometheus. + +> NOTE: Prometheus when scraping data, initially aggregates all samples in memory and WAL (on-disk write-head-log). Only after 2-3h it "compacts" +> the data into disk in form of 2h TSDB block. This is why we need to still query Prometheus for latest data, but overall with this change +> we can keep Prometheus retention to minimum. It's recommended to keep Prometheus retention in this case at least 6 hours long, to have safe buffer +> for a potential event of network partition. + +## Starting Object Storage: Minio + +Let's start simple S3-compatible Minio engine that keeps data in local disk: + +``` +mkdir /root/minio && \ +docker run -d --rm --name minio \ + -v /root/minio:/data \ + -p 9000:9000 -e "MINIO_ACCESS_KEY=minio" -e "MINIO_SECRET_KEY=melovethanos" \ + minio/minio:RELEASE.2019-01-31T00-31-19Z \ + server /data +```{{execute}} + +Create `thanos` bucket: + +``` +mkdir /root/minio/thanos +```{{execute}} + +## Verification + +To check if the Minio is working as intended, let's [open Minio server UI](https://[[HOST_SUBDOMAIN]]-9000-[[KATACODA_HOST]].environments.katacoda.com/minio/) + +Enter the credentials as mentioned below: + +**Access Key** = `minio` +**Secret Key** = `melovethanos` + +## Sidear block backup + +All Thanos components that use object storage uses the same `objstore.config` flag with the same "little" bucket config format. + +Click `Copy To Editor` for config to propagate the configs to the file `bucket_storage.yaml`: + +
+type: S3
+config:
+  bucket: "thanos"
+  endpoint: "127.0.0.1:9000"
+  insecure: true
+  signature_version2: true
+  access_key: "minio"
+  secret_key: "melovethanos"
+
+ +Let's restart sidecar with updated configuration in backup mode. + +``` +docker stop prometheus-0-eu1-sidecar +```{{execute}} + +[Thanos sidecar](https://thanos.io/tip/components/sidecar.md/) allows to backup all the blocks that Prometheus persits to +the disk. In order to accomplish this we need to make sure that: + +* Sidecar has direct access to the Prometheus data directory (in our case host's /root/prom-eu1 dir) (`--tsdb.path` flag) +* Bucket configuration is specified `--objstore.config-file` +* `--shipper.upload-compacted` has to be set if you want to upload already compacted blocks when sidecar starts. Use this only +when you want to upload blocks never seen before on new Prometheus introduced to Thanos system. + +Let's run sidecar: + +``` +docker run -d --net=host --rm \ + -v /root/editor/bucket_storage.yaml:/etc/thanos/minio-bucket.yaml \ + -v /root/prom-eu1:/prometheus \ + --name prometheus-0-eu1-sidecar \ + -u root \ + quay.io/thanos/thanos:v0.16.0 \ + sidecar \ + --tsdb.path /prometheus \ + --objstore.config-file /etc/thanos/minio-bucket.yaml \ + --shipper.upload-compacted \ + --http-address 0.0.0.0:19090 \ + --grpc-address 0.0.0.0:19190 \ + --prometheus.url http://127.0.0.1:9090 +```{{execute}} + +## Verification + +We can check whether the data is uploaded into `thanos` bucket by visitng [Minio](https://[[HOST_SUBDOMAIN]]-9000-[[KATACODA_HOST]].environments.katacoda.com/minio/). +It will take couple of seconds to synchronize all blocks. + +Once all blocks appear in the minio `thanos` bucket, we are sure our data is backed up. Awesome! πŸ’ͺ diff --git a/tutorials/katacoda/thanos/2-lts/step3-answer.md b/tutorials/katacoda/thanos/2-lts/step3-answer.md new file mode 100644 index 0000000000..40ab7c2f64 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step3-answer.md @@ -0,0 +1,19 @@ +## Answer + +**In an HA Prometheus setup with Thanos sidecars, would there be issues with multiple sidecars attempting to upload the same data blocks to object storage?** + +This is handled by having unique **external labels** for all Prometheus, sidecar instances and HA replicas. To indicate that all replicas are storing same targets, they differ only in one label. + +For an instance, consider the situation below: + +``` +First: +"cluster": "prod1" +"replica": "0" + +Second: +"cluster":"prod1" +"replica": "1" +``` + +There is no problem with storing them since the label sets are **unique**. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step3-verify.sh b/tutorials/katacoda/thanos/2-lts/step3-verify.sh new file mode 100644 index 0000000000..1bdfd136b7 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step3-verify.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +curl -s 127.0.0.1:9090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:9091/metrics >/dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 + +curl -s 127.0.0.1:19091/metrics >/dev/null || exit 1 + +echo '"done"' diff --git a/tutorials/katacoda/thanos/2-lts/step3.md b/tutorials/katacoda/thanos/2-lts/step3.md new file mode 100644 index 0000000000..c04446a9c9 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step3.md @@ -0,0 +1,68 @@ +# Step 3 - Fetching metrics from Bucket + +In this step, we will learn about Thanos Store Gateway and how to deploy it. + +## Thanos Components + +Let's take a look at all the Thanos commands: + +```docker run --rm quay.io/thanos/thanos:v0.16.0 --help```{{execute}} + +You should see multiple commands that solve different purposes, block storage based long-term storage for Prometheus. + +In this step we will focus on thanos `store gateway`: + +``` + store [] + Store node giving access to blocks in a bucket provider +``` + +## Store Gateway: + +* This component implements the Store API on top of historical data in an object storage bucket. It acts primarily as an API gateway and therefore does not need +significant amounts of local disk space. +* It keeps a small amount of information about all remote blocks on the local disk and keeps it in sync with the bucket. +This data is generally safe to delete across restarts at the cost of increased startup times. + +You can read more about [Store](https://thanos.io/tip/components/store.md/) here. + +### Deploying store for "EU1" Prometheus data + +``` +docker run -d --net=host --rm \ + -v /root/editor/bucket_storage.yaml:/etc/thanos/minio-bucket.yaml \ + --name store-gateway \ + quay.io/thanos/thanos:v0.16.0 \ + store \ + --objstore.config-file /etc/thanos/minio-bucket.yaml \ + --http-address 0.0.0.0:19091 \ + --grpc-address 0.0.0.0:19191 +```{{execute}} + +## How to query Thanos store data? + +In this step, we will see how we can query Thanos store data which has access to historical data from the `thanos` bucket, and let's play with this setup a bit. + +Click on the [Querier UI `Graph` page](https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com/graph) and try querying data for a year or two by inserting metrics [continuous_app_metric0](https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com/graph?g0.range_input=1y&g0.max_source_resolution=0s&g0.expr=continuous_app_metric0&g0.tab=0). Make sure `deduplication` is selected and you will be able to discover all the data fetched by Thanos store. + +![](https://github.com/soniasingla/thanos/raw/master/tutorials/katacoda/thanos/2-lts/query.png) + +Also, you can check all the active endpoints located by thanos-store by clicking on [Stores](https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com/stores). + +We've added Thanos Query, a web and API frontend that can query a Prometheus instance and Thanos Store at the same time, which gives transparent access to the archived blocks and real-time metrics. The vanilla PromQL Prometheus engine used for evaluating the query deduces what time series and for what time ranges we need to fetch the data. Also, StoreAPIs propagate external labels and the time range they have data for, so we can do basic filtering on this. However, if you don't specify any of these in the query (only "up" series) the querier concurrently asks all the StoreAPI servers. It might cause a duplication of results between sidecar and store data. + +Now, another interesting question here is how to ensure if we query the data from bucket only? + +We can check this by visitng the [New UI]((https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com/new/graph?g0.expr=&g0.tab=0&g0.stacked=0&g0.range_input=1h&g0.max_source_resolution=0s&g0.deduplicate=1&g0.partial_response=0&g0.store_matches=[])), inserting `continuous_app_metric0` metrics again with 1 year time range of graph, and click on `Enable Store Filtering`. This allows us to filter stores and helps in debugging from where we are querying the data exactly. + +## Question Time? πŸ€” + +In an HA Prometheus setup with Thanos sidecars, would there be issues with multiple sidecars attempting to upload the same data blocks to object storage? + +Think over this πŸ˜‰ + +To see the answer to this question click `SHOW SOLUTION` below. + +## Next + +Voila! In the next step, we will talk about Thanos Compactor, it's retention capabilities, and how it improves query efficiency and reduce the required storage size. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step4-verify.sh b/tutorials/katacoda/thanos/2-lts/step4-verify.sh new file mode 100644 index 0000000000..b980b137de --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step4-verify.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +curl -s 127.0.0.1:9090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:9091/metrics >/dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 + +curl -s 127.0.0.1:19091/metrics >/dev/null || exit 1 + +curl -s 127.0.0.1:19095/metrics >/dev/null || exit 1 + +echo '"done"' diff --git a/tutorials/katacoda/thanos/2-lts/step4.md b/tutorials/katacoda/thanos/2-lts/step4.md new file mode 100644 index 0000000000..c0b8c60cbe --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step4.md @@ -0,0 +1,57 @@ +# Step 4 - Thanos Compactor + +In this step, we will install Thanos Compactor which applies the compaction procedure of the Prometheus 2.0 storage engine to block data in object storage. + +Before, moving forward, let's take a closer look at what the `Compactor` component does: + +## Compactor + +The `Compactor` is an essential component that operates on a single object storage bucket to compact, down-sample, apply retention, to the TSDB blocks held inside, thus, making queries on historical data more efficient. It creates aggregates of old metrics (based upon the rules). + +It is also responsible for downsampling of data, performing 5m downsampling after 40 hours, and 1h downsampling after 10 days. + +If you want to know more about Thanos Compactor, jump [here](https://thanos.io/tip/components/compact.md/). + +**Note**: Thanos Compactor is mandatory if you use object storage otherwise Thanos Store Gateway will be too slow without using a compactor. + +## Deploying Thanos Compactor + +Click below snippet to start the Compactor. + +``` +docker run -d --net=host --rm \ + -v /root/editor/bucket_storage.yaml:/etc/thanos/minio-bucket.yaml \ + --name thanos-compact \ + quay.io/thanos/thanos:v0.16.0 \ + compact \ + --wait --wait-interval 30s \ + --consistency-delay 0s \ + --objstore.config-file /etc/thanos/minio-bucket.yaml \ + --http-address 0.0.0.0:19095 +```{{execute}} + +The flag `wait` is used to make sure all compactions have been processed while `--wait-interval` is kept in 30s to perform all the compactions and downsampling very quickly. Also, this only works when when `--wait` flag is specified. Another flag `--consistency-delay` is basically used for buckets which are not consistent strongly. It is the minimum age of non-compacted blocks before they are being processed. Here, we kept the delay at 0s assuming the bucket is consistent. + +## Setup Verification + +To check if compactor works fine, we can look at the [Bucket View](https://[[HOST_SUBDOMAIN]]-19095-[[KATACODA_HOST]].environments.katacoda.com/loaded). + +Now, if we click on the blocks, they will provide us all the metadata (Series, Samples, Resolution, Chunks, and many more things). + +## Compaction and Downsampling + +When we query large historical data there will be definitely many complexities making the queries slower and slower as we retrieve year's worth of data. Thus, Thanos uses the technique called downsampling (a process of reducing the sampling rate of the signal) to keep the queries responsive, and no special configuration is required to perform this process. The Compactor applies compaction to the bucket data and also completes the downsampling for historical data. + +Now, click on the [Querier](https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com/new/graph?g0.expr=&g0.tab=0&g0.stacked=0&g0.range_input=1h&g0.max_source_resolution=0s&g0.deduplicate=1&g0.partial_response=0&g0.store_matches=[]) and insert metrics `continuous_app_metric0` with 1 year time range of graph, and also, click on `Enable Store Filtering`. + +Let's try querying `Max 5m downsampling` data, it uses 5m resolution and it will be faster than the raw data. Also, Downsampling is built on top of data, and never done on **young** data. + +## Unlimited Retention - Not Challenging anymore? + +Having a long time metric retention for Prometheus was always involving lots of complexity, disk space, and manual work. With Thanos, you can make Prometheus almost stateless, while having most of the data in durable and cheap object storage. + +## Next + +Awesome work! Feel free to play with the setup πŸ€— + +Once Done, hit `Continue` for summary. \ No newline at end of file