diff --git a/Dockerfile b/Dockerfile index b9500ca..1d84802 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,14 +17,17 @@ ENV CRAWL_GIT_REPO=https://github.com/skylenet/discv4-dns-lists.git \ CRAWL_DNS_PUBLISH_CLOUDFLARE=false \ CLOUDFLARE_API_TOKEN="" \ CLOUDFLARE_ZONE_ID="" \ - CRAWL_PUBLISH_METRICS=false \ + INFLUXDB_METRICS_ENABLED=false \ INFLUXDB_URL=http://localhost:8086 \ INFLUXDB_DB=metrics \ INFLUXDB_USER=user \ - INFLUXDB_PASSWORD=password + INFLUXDB_PASSWORD=password \ + PROMETHEUS_METRICS_ENABLED=true \ + PROMETHEUS_METRICS_LISTEN=0.0.0.0:9100 RUN apt-get update && apt-get install -y --no-install-recommends git curl jq +EXPOSE 9100 WORKDIR /crawler ADD run.sh . CMD ["./run.sh"] diff --git a/README.md b/README.md index be2509a..0589515 100644 --- a/README.md +++ b/README.md @@ -41,16 +41,18 @@ Name | Default | Description `CRAWL_DNS_PUBLISH_CLOUDFLARE` | `false` | Publish the TXT records to a DNS zone on Cloudflare `CLOUDFLARE_API_TOKEN`| `` | API token used for the Cloudflare API `CLOUDFLARE_ZONE_ID` | `` | Cloudflare DNS zone identifier. This is the zone where the records will be published to. -`CRAWL_PUBLISH_METRICS` | `false` | Set to `true` if you want to send metrics to InfluxDB +`INFLUXDB_METRICS_ENABLED` | `false` | Set to `true` if you want to send metrics to InfluxDB `INFLUXDB_URL` | `http://localhost:8086` | Address of the InfluxDB API `INFLUXDB_DB` | `metrics` | Database name `INFLUXDB_USER` | `user` | Username for InfluxDB `INFLUXDB_PASSWORD` | `password` | Password for InfluxDB +`PROMETHEUS_METRICS_ENABLED`| `true` | Enable prometheus metrics endpoint +`PROMETHEUS_METRICS_LISTEN` | `0.0.0.0:9100` | Server listening ### Building the image ```sh -$ docker build -t disc4-crawl . +$ docker build -t discv4-crawl . ``` ### Run examples diff --git a/run.sh b/run.sh index 69e1f52..95ba3ba 100755 --- a/run.sh +++ b/run.sh @@ -21,12 +21,16 @@ CRAWL_DNS_PUBLISH_CLOUDFLARE="${CRAWL_DNS_PUBLISH_CLOUDFLARE-false}" CLOUDFLARE_API_TOKEN="${CLOUDFLARE_API_TOKEN-}" CLOUDFLARE_ZONE_ID="${CLOUDFLARE_ZONE_ID-}" -CRAWL_PUBLISH_METRICS="${CRAWL_PUBLISH_METRICS:-false}" +INFLUXDB_METRICS_ENABLED="${INFLUXDB_METRICS_ENABLED:-false}" INFLUXDB_URL="${INFLUXDB_URL:-http://localhost:8086}" INFLUXDB_DB="${INFLUXDB_DB:-metrics}" INFLUXDB_USER="${INFLUXDB_USER:-user}" INFLUXDB_PASSWORD="${INFLUXDB_PASSWORD:-password}" +PROMETHEUS_METRICS_ENABLED="${PROMETHEUS_METRICS_ENABLED:-true}" +PROMETHEUS_METRICS_LISTEN="${PROMETHEUS_METRICS_LISTEN:-0.0.0.0:9100}" + +prometheus_metrics_dir=$(mktemp -d) set -xe geth_src="$PWD/go-ethereum" @@ -114,7 +118,7 @@ git_push_crawler_output() { fi } -publish_metrics() { +publish_influx_metrics() { echo -n "" > metrics.txt for D in *."${CRAWL_DNS_DOMAIN}"; do if [ -d "${D}" ]; then @@ -130,6 +134,23 @@ publish_metrics() { rm metrics.txt } +init_prometheus_metrics() { + go install -v github.com/projectdiscovery/simplehttpserver/cmd/simplehttpserver@v0.0.6 + simplehttpserver -listen "${PROMETHEUS_METRICS_LISTEN}" -path "${prometheus_metrics_dir}" -silent & + publish_prometheus_metrics +} + +publish_prometheus_metrics() { + prometheus_metrics_file="${prometheus_metrics_dir}/metrics" + echo -n "" > "${prometheus_metrics_file}" + for D in *."${CRAWL_DNS_DOMAIN}"; do + if [ -d "${D}" ]; then + LEN=$(jq length < "${D}/nodes.json") + echo "devp2p_discv4_dns_nodes{domain=\"${D}\"} ${LEN}" >> "${prometheus_metrics_file}" + fi + done +} + # Main execution git config --global user.email "$CRAWL_GIT_EMAIL" @@ -139,6 +160,8 @@ git_update_repo "$CRAWL_GIT_REPO" output "$CRAWL_GIT_BRANCH" PATH="$geth_src:$PATH" cd output +init_prometheus_metrics + while true do # Pull changes from go-ethereum. @@ -166,11 +189,13 @@ do fi # Publish metrics - if [ "$CRAWL_PUBLISH_METRICS" = true ] ; then - publish_metrics + if [ "$INFLUXDB_METRICS_ENABLED" = true ] ; then + publish_influx_metrics + fi + if [ "$PROMETHEUS_METRICS_ENABLED" = true ] ; then + publish_prometheus_metrics fi - # Publish DNS records if [ "$CRAWL_RUN_ONCE" = true ] ; then echo "Ran once. Job is done. Exiting..." break @@ -180,3 +205,6 @@ do echo "Waiting $CRAWL_INTERVAL seconds for the next run..." sleep "$CRAWL_INTERVAL" done + +# Kill all background jobs +kill "$(jobs -p)"