diff --git a/tutorials/katacoda/thanos-pathway.json b/tutorials/katacoda/thanos-pathway.json index 02f647ae89..5b4b1ab3df 100644 --- a/tutorials/katacoda/thanos-pathway.json +++ b/tutorials/katacoda/thanos-pathway.json @@ -11,7 +11,7 @@ { "course_id": "2-lts", "title": "Intro: Downsampling and unlimited metric retention for Prometheus", - "description": "In progress. Stay Tuned!" + "description": "Learn how to extend your metric retention in a cheap and easy way with Thanos." }, { "course_id": "3-meta-monitoring", diff --git a/tutorials/katacoda/thanos/2-lts/courseBase.sh b/tutorials/katacoda/thanos/2-lts/courseBase.sh index f1f641af19..cc628b61a3 100644 --- a/tutorials/katacoda/thanos/2-lts/courseBase.sh +++ b/tutorials/katacoda/thanos/2-lts/courseBase.sh @@ -1 +1,8 @@ #!/usr/bin/env bash + +docker pull minio/minio:RELEASE.2019-01-31T00-31-19Z +docker pull quay.io/prometheus/prometheus:v2.20.0 +docker pull quay.io/thanos/thanos:v0.16.0 +docker pull quay.io/thanos/thanosbench:v0.2.0-rc.1 + +mkdir /root/editor diff --git a/tutorials/katacoda/thanos/2-lts/finish.md b/tutorials/katacoda/thanos/2-lts/finish.md new file mode 100644 index 0000000000..eee4b96ffc --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/finish.md @@ -0,0 +1,17 @@ +# Summary + +Congratulations! πππ +You completed our second Thanos tutorial. Let's summarize what we learned: + +* To preserve the data beyond Prometheus regular retention time, we used an object storage system for backing up our historical data. +* The Thanos Store component acts as a data retrieval proxy for data inside our object storage. +* With Sidecar uploading metric blocks to the object store as soon as it is written to disk, it keeps the βscraperβ (Prometheus with Thanos Sidecar), lightweight. This simplifies maintenance, cost, and system design. +* Thanos Compactor improved query efficiency and also reduced the required storage size. + +See next courses for other tutorials about different deployment models and more advanced features of Thanos! + +### Feedback + +Do you see any bug, typo in the tutorial or you have some feedback for us? + +let us know on https://github.com/thanos-io/thanos or #thanos slack channel linked on https://thanos.io \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/index.json b/tutorials/katacoda/thanos/2-lts/index.json index 35d2f8f836..fc74b70d58 100644 --- a/tutorials/katacoda/thanos/2-lts/index.json +++ b/tutorials/katacoda/thanos/2-lts/index.json @@ -1,17 +1,53 @@ { "title": "Intro: Downsampling and unlimited metric retention for Prometheus", - "description": "Learn how to extend you metric retention in a cheap way with Thanos.", + "description": "Learn how to extend your metric retention in a cheap and easy way with Thanos.", + "difficulty": "Moderate", "details": { "steps": [ + { + "title": "Configuring Initial Prometheus Server", + "text": "step1.md", + "verify": "step1-verify.sh" + }, + { + "title": "Thanos Sidecars", + "text": "step2.md", + "verify": "step2-verify.sh" + }, + { + "title": "Thanos Store Gateway", + "text": "step3.md", + "answer": "step3-answer.md" + }, + { + "title": "Thanos Compactor", + "text": "step4.md" + } ], "intro": { "text": "intro.md", - "courseData": "courseBase.sh" + "courseData": "courseBase.sh", + "credits": "https://thanos.io" + }, + "files": { + "text": "finish.md", + "credits": "test" } }, + "files": [ + "prometheus0_eu1.yml", + "bucket_storage.yaml" + ], "environment": { "uilayout": "editor-terminal", - "uisettings": "yaml" + "uisettings": "yaml", + "uieditorpath": "/root/editor", + "showdashboard": true, + "dashboards": [ + {"name": "Prometheus 0 EU1", "port": 9090}, + {"name": "Minio", "port": 9000}, + {"name": "Thanos Query", "port": 9091} + ] }, "backend": { "imageid": "docker-direct" diff --git a/tutorials/katacoda/thanos/2-lts/intro.md b/tutorials/katacoda/thanos/2-lts/intro.md index 26a56510f0..eeb8e58273 100644 --- a/tutorials/katacoda/thanos/2-lts/intro.md +++ b/tutorials/katacoda/thanos/2-lts/intro.md @@ -1,7 +1,30 @@ -# In progress +# Intro: Downsampling and unlimited metric retention for Prometheus -π§ This tutorial is in progress. π§ +They say that [Thanos](thanos.io) is a set of components that can be composed into a highly available metric system with **unlimited storage capacity** +and that it can be added **seamlessly** on top of existing Prometheus deployments. π€π€ -Do you want to see this soon or you want to help us? +In this course you can experience all of this yourself. -Let us know on https://github.com/thanos-io/thanos or #thanos slack channel linked on https://thanos.io \ No newline at end of file +In this tutorial, you will learn about: + +* How to start uploading your Prometheus data seamlessly to cheap object storage thanks to Thanos sidecar. +* How to further query your data in object storage thanks to Thanos Store Gateway. +* How to query both fresh and older data in easy way through Thanos Querier. + +All of this allows you to keep your metrics in cheap and reliable object storage, allowing virtually unlimited metric retention for Prometheus. + +> NOTE: This course uses docker containers with pre-built Thanos, Prometheus, and Minio Docker images available publicly. +> However, a similar scenario will work with any other deployment method like Kubernetes or systemd, etc. + +### Prerequisites + +Please complete first intro course about GlobalView before jumping into this one! π€ + +### Feedback + +Do you see any bug, typo in the tutorial or you have some feedback for us? +Let us know on https://github.com/thanos-io/thanos or #thanos slack channel linked on https://thanos.io + +### Contributed by: + +* Sonia Singla [@soniasingla](http://github.com/soniasingla) \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/query.png b/tutorials/katacoda/thanos/2-lts/query.png new file mode 100644 index 0000000000..5561402330 Binary files /dev/null and b/tutorials/katacoda/thanos/2-lts/query.png differ diff --git a/tutorials/katacoda/thanos/2-lts/step1-verify.sh b/tutorials/katacoda/thanos/2-lts/step1-verify.sh new file mode 100644 index 0000000000..7e9a0cd6c2 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step1-verify.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +curl -s 127.0.0.1:9090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:9091/metrics >/dev/null || exit 1 + +echo '"done"' diff --git a/tutorials/katacoda/thanos/2-lts/step1.md b/tutorials/katacoda/thanos/2-lts/step1.md new file mode 100644 index 0000000000..d3649325c7 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step1.md @@ -0,0 +1,145 @@ +# Step 1 - Initial Prometheus Setup + +In this tutorial, we will mimic the usual state with a Prometheus server running for... a year!. +We will use it to seamlessly backup all old data in the object storage and configure Prometheus for continuous backup mode, which +will allow us to cost-effectively achieve unlimited retention for Prometheus. + +Last but not the least, we will go through setting all up for querying and automated maintenance (e.g compactions, retention and downsampling). + +In order to showcase all of this, let's start with single cluster setup from the previous course. Let's start this initial Prometheus setup, ready? + +## Generate Artificial Metrics for 1 year + +Actually, before starting Prometheus, let's generate some **artificial data**. You most likely want to learn about Thanos fast, +so you probably don't have months to wait for this tutorial until Prometheus collects the month of metrics, do you? (: + +We will use our handy [thanosbench](https://github.com/thanos-io/thanosbench) project to do so! Let's generate Prometheus +data (in form of TSDB blocks) with just 5 series (gauges) that spans from a year ago until now (-6h)! + +Execute the following command (should take few seconds): + +``` +mkdir -p /root/prom-eu1 && docker run -i quay.io/thanos/thanosbench:v0.2.0-rc.1 block plan -p continuous-365d-tiny --labels 'cluster="eu1"' --max-time=6h | docker run -v /root/prom-eu1:/prom-eu1 -i quay.io/thanos/thanosbench:v0.2.0-rc.1 block gen --output.dir prom-eu1 +```{{execute}} + +On successful block creation you should see following log lines: + +``` +level=info ts=2020-10-20T18:28:42.625041939Z caller=block.go:87 msg="all blocks done" count=X +level=info ts=2020-10-20T18:28:42.625100758Z caller=main.go:118 msg=exiting cmd="block gen" +``` + +Run `ls -lR /root/prom-eu1` to see dozens of generated TSDB blocks. + +## Prometheus Configuration File + +Here, we will prepare configuration files for the Prometheus instance that will run with our pre-generated data. +It will also scrape our components we will use in this tutorial. + +Click `Copy To Editor` for config to propagate the configs to file. + +
+global: + scrape_interval: 5s + external_labels: + cluster: eu1 + replica: 0 + tenant: team-eu # Not needed, but a good practice if you want to grow this to multi-tenant system some day. + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['127.0.0.1:9090'] + - job_name: 'sidecar' + static_configs: + - targets: ['127.0.0.1:19090'] + - job_name: 'minio' + metrics_path: /minio/prometheus/metrics + static_configs: + - targets: ['127.0.0.1:9000'] + - job_name: 'querier' + static_configs: + - targets: ['127.0.0.1:9091'] + - job_name: 'store_gateway' + static_configs: + - targets: ['127.0.0.1:19092'] ++ +## Starting Prometheus Instance + +Let's now start the container representing Prometheus instance. + +Note `-v /root/prom-eu1:/prometheus \` and `--storage.tsdb.path=/prometheus` that allows us to place our generated data +in Prometheus data directory. + +Let's deploy Prometheus now. Note that we disabled local Prometheus compactions `storage.tsdb.max-block-duration` and `min` flags. +Currently, this is important for the basic object storage backup scenario to avoid conflicts between the bucket and local compactions. +Read more [here](https://thanos.io/tip/components/sidecar.md/#sidecar). + +We also extend Prometheus retention: `--storage.tsdb.retention.time=1000d`. This is because Prometheus by default removes all data older +than 2 weeks. And we have a year (: + +### Deploying "EU1" + +``` +docker run -d --net=host --rm \ + -v /root/editor/prometheus0_eu1.yml:/etc/prometheus/prometheus.yml \ + -v /root/prom-eu1:/prometheus \ + -u root \ + --name prometheus-0-eu1 \ + quay.io/prometheus/prometheus:v2.20.0 \ + --config.file=/etc/prometheus/prometheus.yml \ + --storage.tsdb.retention.time=1000d \ + --storage.tsdb.path=/prometheus \ + --storage.tsdb.max-block-duration=2h \ + --storage.tsdb.min-block-duration=2h \ + --web.listen-address=:9090 \ + --web.external-url=https://[[HOST_SUBDOMAIN]]-9090-[[KATACODA_HOST]].environments.katacoda.com \ + --web.enable-lifecycle \ + --web.enable-admin-api +```{{execute}} + +## Setup Verification + +Once started you should be able to reach the Prometheus instance here and query.. 1 year of data! + +* [Prometheus-0 EU1](https://[[HOST_SUBDOMAIN]]-9090-[[KATACODA_HOST]].environments.katacoda.com/) + +## Thanos Sidecar & Querier + +Similar to previous course, let's setup global view querying with sidecar: + +``` +docker run -d --net=host --rm \ + --name prometheus-0-eu1-sidecar \ + -u root \ + quay.io/thanos/thanos:v0.16.0 \ + sidecar \ + --http-address 0.0.0.0:19090 \ + --grpc-address 0.0.0.0:19190 \ + --prometheus.url http://127.0.0.1:9090 +```{{execute}} + +And Querier. As you remember [Thanos sidecar](https://thanos.io/tip/components/query.md/) exposes `StoreAPI` +so we will make sure we point the Querier to the gRPC endpoints of the sidecar: + +``` +docker run -d --net=host --rm \ + --name querier \ + quay.io/thanos/thanos:v0.16.0 \ + query \ + --http-address 0.0.0.0:9091 \ + --query.replica-label replica \ + --store 127.0.0.1:19190 +```{{execute}} + +## Setup verification + +Similar to previous course let's check if the Querier works as intended. Let's look on +[Querier UI `Store` page](https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com/stores). + +This should list the sidecar, including the external labels. + +On graph you should also see our 5 series for 1y time, thanks to Prometheus and sidecar StorAPI: [Graph](https://[[HOST_SUBDOMAIN]]-9091-[[KATACODA_HOST]].environments.katacoda.com). + +Click `Continue` to see how we can move this data to much cheaper and easier to operate object storage. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step2-verify.sh b/tutorials/katacoda/thanos/2-lts/step2-verify.sh new file mode 100644 index 0000000000..68bdd69733 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step2-verify.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +curl -s 127.0.0.1:9090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:9091/metrics >/dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 + +echo '"done"' diff --git a/tutorials/katacoda/thanos/2-lts/step2.md b/tutorials/katacoda/thanos/2-lts/step2.md new file mode 100644 index 0000000000..81a399a5e0 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step2.md @@ -0,0 +1,97 @@ +# Step 2 - Object Storage Continuous Backup + +Maintaining one year of data within your Prometheus is doable, but not easy. It's tricky to +resize, backup or maintain this data long term. On top of that Prometheus does not do any replication, +so any unavailability of Prometheus results in query unavailability. + +This is where Thanos comes to play. With a single configuration change we can allow Thanos Sidecar to continuously upload blocks of metrics +that are periodically persisted to disk by the Prometheus. + +> NOTE: Prometheus when scraping data, initially aggregates all samples in memory and WAL (on-disk write-head-log). Only after 2-3h it "compacts" +> the data into disk in form of 2h TSDB block. This is why we need to still query Prometheus for latest data, but overall with this change +> we can keep Prometheus retention to minimum. It's recommended to keep Prometheus retention in this case at least 6 hours long, to have safe buffer +> for a potential event of network partition. + +## Starting Object Storage: Minio + +Let's start simple S3-compatible Minio engine that keeps data in local disk: + +``` +mkdir /root/minio && \ +docker run -d --rm --name minio \ + -v /root/minio:/data \ + -p 9000:9000 -e "MINIO_ACCESS_KEY=minio" -e "MINIO_SECRET_KEY=melovethanos" \ + minio/minio:RELEASE.2019-01-31T00-31-19Z \ + server /data +```{{execute}} + +Create `thanos` bucket: + +``` +mkdir /root/minio/thanos +```{{execute}} + +## Verification + +To check if the Minio is working as intended, let's [open Minio server UI](https://[[HOST_SUBDOMAIN]]-9000-[[KATACODA_HOST]].environments.katacoda.com/minio/) + +Enter the credentials as mentioned below: + +**Access Key** = `minio` +**Secret Key** = `melovethanos` + +## Sidear block backup + +All Thanos components that use object storage uses the same `objstore.config` flag with the same "little" bucket config format. + +Click `Copy To Editor` for config to propagate the configs to the file `bucket_storage.yaml`: + +
+type: S3 +config: + bucket: "thanos" + endpoint: "127.0.0.1:9000" + insecure: true + signature_version2: true + access_key: "minio" + secret_key: "melovethanos" ++ +Let's restart sidecar with updated configuration in backup mode. + +``` +docker stop prometheus-0-eu1-sidecar +```{{execute}} + +[Thanos sidecar](https://thanos.io/tip/components/sidecar.md/) allows to backup all the blocks that Prometheus persits to +the disk. In order to accomplish this we need to make sure that: + +* Sidecar has direct access to the Prometheus data directory (in our case host's /root/prom-eu1 dir) (`--tsdb.path` flag) +* Bucket configuration is specified `--objstore.config-file` +* `--shipper.upload-compacted` has to be set if you want to upload already compacted blocks when sidecar starts. Use this only +when you want to upload blocks never seen before on new Prometheus introduced to Thanos system. + +Let's run sidecar: + +``` +docker run -d --net=host --rm \ + -v /root/editor/bucket_storage.yaml:/etc/thanos/minio-bucket.yaml \ + -v /root/prom-eu1:/prometheus \ + --name prometheus-0-eu1-sidecar \ + -u root \ + quay.io/thanos/thanos:v0.16.0 \ + sidecar \ + --tsdb.path /prometheus \ + --objstore.config-file /etc/thanos/minio-bucket.yaml \ + --shipper.upload-compacted \ + --http-address 0.0.0.0:19090 \ + --grpc-address 0.0.0.0:19190 \ + --prometheus.url http://127.0.0.1:9090 +```{{execute}} + +## Verification + +We can check whether the data is uploaded into `thanos` bucket by visitng [Minio](https://[[HOST_SUBDOMAIN]]-9000-[[KATACODA_HOST]].environments.katacoda.com/minio/). +It will take couple of seconds to synchronize all blocks. + +Once all blocks appear in the minio `thanos` bucket, we are sure our data is backed up. Awesome! πͺ diff --git a/tutorials/katacoda/thanos/2-lts/step3-answer.md b/tutorials/katacoda/thanos/2-lts/step3-answer.md new file mode 100644 index 0000000000..40ab7c2f64 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step3-answer.md @@ -0,0 +1,19 @@ +## Answer + +**In an HA Prometheus setup with Thanos sidecars, would there be issues with multiple sidecars attempting to upload the same data blocks to object storage?** + +This is handled by having unique **external labels** for all Prometheus, sidecar instances and HA replicas. To indicate that all replicas are storing same targets, they differ only in one label. + +For an instance, consider the situation below: + +``` +First: +"cluster": "prod1" +"replica": "0" + +Second: +"cluster":"prod1" +"replica": "1" +``` + +There is no problem with storing them since the label sets are **unique**. \ No newline at end of file diff --git a/tutorials/katacoda/thanos/2-lts/step3-verify.sh b/tutorials/katacoda/thanos/2-lts/step3-verify.sh new file mode 100644 index 0000000000..1bdfd136b7 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step3-verify.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +curl -s 127.0.0.1:9090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 +curl -s 127.0.0.1:9091/metrics >/dev/null || exit 1 + +curl -s 127.0.0.1:19090/metrics >/dev/null || exit 1 + +curl -s 127.0.0.1:19091/metrics >/dev/null || exit 1 + +echo '"done"' diff --git a/tutorials/katacoda/thanos/2-lts/step3.md b/tutorials/katacoda/thanos/2-lts/step3.md new file mode 100644 index 0000000000..c04446a9c9 --- /dev/null +++ b/tutorials/katacoda/thanos/2-lts/step3.md @@ -0,0 +1,68 @@ +# Step 3 - Fetching metrics from Bucket + +In this step, we will learn about Thanos Store Gateway and how to deploy it. + +## Thanos Components + +Let's take a look at all the Thanos commands: + +```docker run --rm quay.io/thanos/thanos:v0.16.0 --help```{{execute}} + +You should see multiple commands that solve different purposes, block storage based long-term storage for Prometheus. + +In this step we will focus on thanos `store gateway`: + +``` + store [