Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨(backend) Adding /prometheus metrics endpoint #455 #476

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ and this project adheres to

## [Unreleased]

## Added

✨(backend) Adding /prometheus metrics endpoint #455

## [1.8.2] - 2024-11-28

Expand Down
3 changes: 3 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ services:
environment:
- PYLINTHOME=/app/.pylint.d
- DJANGO_CONFIGURATION=Development
- PROMETHEUS_EXPORTER=true
- K8S_PROBING=true
- MONITORING_ALLOWED_CIDR_RANGES="172.23.0.0/16" # separate by comma
env_file:
- env.d/development/common
- env.d/development/postgresql
Expand Down
100 changes: 100 additions & 0 deletions src/backend/core/api/custom_metrics_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from prometheus_client.core import GaugeMetricFamily
from django.utils.timezone import now
from django.db.models import Count, Min, Max, Q, F
from datetime import timedelta
from core import models
from django.conf import settings


class CustomMetricsExporter:
"""
Custom Prometheus metrics collector for user and document statistics.
"""

def collect(self):
namespace = getattr(settings, "PROMETHEUS_METRIC_NAMESPACE", "")

def prefixed_metric_name(name):
return f"{namespace}_{name}" if namespace else name

now_time = now()
today_start_utc = now_time.replace(hour=0, minute=0, second=0, microsecond=0)
one_week_ago = today_start_utc - timedelta(days=7)
one_month_ago = today_start_utc - timedelta(days=30)

user_count = models.User.objects.count()
active_users_today = models.User.objects.filter(
Q(documentaccess__updated_at__gte=today_start_utc) |
Q(link_traces__created_at__gte=today_start_utc) |
Q(last_login__gte=today_start_utc)
).distinct().count()
active_users_7_days = models.User.objects.filter(
Q(documentaccess__updated_at__gte=one_week_ago) |
Q(link_traces__created_at__gte=one_week_ago) |
Q(last_login__gte=one_week_ago)
).distinct().count()
active_users_30_days = models.User.objects.filter(
Q(documentaccess__updated_at__gte=one_month_ago) |
Q(link_traces__created_at__gte=one_month_ago) |
Q(last_login__gte=one_month_ago)
).distinct().count()

total_documents = models.Document.objects.count()
shared_docs_count = models.Document.objects.annotate(
access_count=Count("accesses")
).filter(access_count__gt=1).count()
active_docs_today = models.Document.objects.filter(
updated_at__gte=today_start_utc,
updated_at__lt=today_start_utc + timedelta(days=1),
).count()
active_docs_last_7_days = models.Document.objects.filter(
updated_at__gte=one_week_ago
).count()
active_docs_last_30_days = models.Document.objects.filter(
updated_at__gte=one_month_ago
).count()

oldest_doc_date = models.Document.objects.aggregate(
oldest=Min("created_at")
)["oldest"]
newest_doc_date = models.Document.objects.aggregate(
newest=Max("created_at")
)["newest"]

user_doc_counts = models.DocumentAccess.objects.values("user_id").annotate(
doc_count=Count("document_id"),
admin_email=F("user__admin_email")
)

metrics = []
metrics.append(GaugeMetricFamily(prefixed_metric_name("total_users"), "Total number of users", value=user_count))
metrics.append(GaugeMetricFamily(prefixed_metric_name("active_users_today"), "Number of active users today", value=active_users_today))
metrics.append(GaugeMetricFamily(prefixed_metric_name("active_users_7_days"), "Number of active users in the last 7 days", value=active_users_7_days))
metrics.append(GaugeMetricFamily(prefixed_metric_name("active_users_30_days"), "Number of active users in the last 30 days", value=active_users_30_days))
metrics.append(GaugeMetricFamily(prefixed_metric_name("total_documents"), "Total number of documents", value=total_documents))
metrics.append(GaugeMetricFamily(prefixed_metric_name("shared_documents"), "Number of shared documents", value=shared_docs_count))
metrics.append(GaugeMetricFamily(prefixed_metric_name("active_documents_today"), "Number of active documents today", value=active_docs_today))
metrics.append(GaugeMetricFamily(prefixed_metric_name("active_documents_7_days"), "Number of active documents in the last 7 days", value=active_docs_last_7_days))
metrics.append(GaugeMetricFamily(prefixed_metric_name("active_documents_30_days"), "Number of active documents in the last 30 days", value=active_docs_last_30_days))

if oldest_doc_date:
metrics.append(GaugeMetricFamily(
prefixed_metric_name("oldest_document_date"), "Timestamp of the oldest document creation date",
value=oldest_doc_date.timestamp()
))
if newest_doc_date:
metrics.append(GaugeMetricFamily(
prefixed_metric_name("newest_document_date"), "Timestamp of the newest document creation date",
value=newest_doc_date.timestamp()
))

user_distribution_metric = GaugeMetricFamily(
prefixed_metric_name("user_document_distribution"), "Document counts per user", labels=["user_email"]
)
for user in user_doc_counts:
if user["admin_email"]: # Validate email existence
user_distribution_metric.add_metric([user["admin_email"]], user["doc_count"])
metrics.append(user_distribution_metric)

for metric in metrics:
yield metric
92 changes: 92 additions & 0 deletions src/backend/core/api/custom_probe_views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import uuid
import requests
from django.http import JsonResponse, HttpResponseServerError, HttpResponse
from django.db import connections
from django.db.utils import OperationalError
from django.core.cache import cache
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile

from impress import settings


def liveness_check(request):
"""
Liveness probe endpoint.
Returns HTTP 200 if the application is alive and running.
"""

try:
return JsonResponse({"status": "OK"}, status=200)
except Exception as e:
return JsonResponse({"status": "Error", "message": str(e)}, status=500)


def readiness_check(request):
"""
Readiness probe endpoint.
Checks database, cache, media storage, and OIDC configuration.
Returns HTTP 200 with JSON status "OK" if all checks pass,
or HTTP 500 with JSON status "Error" and an error message.
"""

def check_database():
"""Check database connectivity."""
try:
db_conn = connections['default']
db_conn.cursor()
except OperationalError as e:
raise Exception(f"Database check failed: {e}")

def check_cache():
"""Check cache connectivity."""
test_key = "readiness-probe"
test_value = "ready"
cache.set(test_key, test_value, timeout=5)
if cache.get(test_key) != test_value:
raise Exception("Cache check failed: Value mismatch or cache unavailable")

def check_media_storage():
"""Check S3 storage connectivity."""
test_file_name = f"readiness-check-{uuid.uuid4()}.txt"
test_content = ContentFile(b"readiness check")
try:
# Attempt to save the test file
default_storage.save(test_file_name, test_content)
# Attempt to delete the test file
default_storage.delete(test_file_name)
except Exception as e:
# Raise an exception if any error occurs during save or delete
raise Exception(f"Media storage check failed: {e}")

def check_oidc():
"""Check OIDC configuration and connectivity."""
required_endpoints = [
("OIDC_OP_JWKS_ENDPOINT", settings.OIDC_OP_JWKS_ENDPOINT),
("OIDC_OP_TOKEN_ENDPOINT", settings.OIDC_OP_TOKEN_ENDPOINT),
("OIDC_OP_USER_ENDPOINT", settings.OIDC_OP_USER_ENDPOINT),
]

missing_endpoints = [name for name, url in required_endpoints if not url]
if missing_endpoints:
raise Exception(f"Missing OIDC configuration for: {', '.join(missing_endpoints)}")

for name, url in required_endpoints:
try:
requests.get(url, timeout=5) # Just ensure the endpoint responds no matter the http status code
except requests.RequestException as e:
raise Exception(f"Failed to reach {name} ({url}): {e}")

try:
# Run all checks
check_database()
check_cache()
check_media_storage()
check_oidc()

# If all checks pass
return JsonResponse({"status": "OK"}, status=200)

except Exception as e:
# Return error response
return JsonResponse({"status": "Error", "message": str(e)}, status=500)
47 changes: 47 additions & 0 deletions src/backend/core/api/decorators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import os
from ipaddress import ip_network, ip_address
from django.http import HttpResponseForbidden


def monitoring_cidr_protected_view(view):
"""
Decorator to protect a view with a CIDR filter.
CIDR ranges are fetched from the environment variable `MONITORING_ALLOWED_CIDR_RANGES`.
If set to '*', all clients are allowed. If not set or empty, access is denied.
"""
# Fetch allowed CIDR ranges from the environment variable
cidr_env = os.environ.get("MONITORING_ALLOWED_CIDR_RANGES", "").strip()

# Handle the special case for allowing all clients
allow_all = cidr_env == "*"

# Validate and parse CIDR ranges if not allowing all
try:
allowed_cidr_ranges = [
ip_network(cidr.strip().strip('"').strip("'"))
for cidr in cidr_env.split(",")
if cidr.strip() and cidr != "*"
]
except ValueError as e:
raise ValueError(f"Invalid CIDR range in MONITORING_ALLOWED_CIDR_RANGES: {e}")

def wrapped_view(request, *args, **kwargs):
# Get the client's IP address from the request
client_ip = request.META.get("REMOTE_ADDR")

# Allow all clients if explicitly configured
if allow_all:
return view(request, *args, **kwargs)

# If no CIDR ranges are configured, deny access
if not allowed_cidr_ranges:
return HttpResponseForbidden("Access denied: No allowed CIDR ranges configured.")

# Check if the client's IP is in the allowed CIDR ranges
if not any(ip_address(client_ip) in cidr for cidr in allowed_cidr_ranges):
return HttpResponseForbidden("Access denied: Your IP is not allowed.")

# Proceed to the original view
return view(request, *args, **kwargs)

return wrapped_view
15 changes: 14 additions & 1 deletion src/backend/impress/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_DIR = os.path.join("/", "data")
PROMETHEUS_EXPORTER = os.getenv("PROMETHEUS_EXPORTER", "False").lower() == "true"


def get_release():
Expand Down Expand Up @@ -282,6 +283,14 @@ class Base(Configuration):
"dockerflow.django.middleware.DockerflowMiddleware",
]

if PROMETHEUS_EXPORTER:
MIDDLEWARE.insert(0, "django_prometheus.middleware.PrometheusBeforeMiddleware")
MIDDLEWARE.append("django_prometheus.middleware.PrometheusAfterMiddleware")
PROMETHEUS_METRIC_NAMESPACE = "impress"
PROMETHEUS_LATENCY_BUCKETS = (
.05, .1, .25, .5, .75, 1.0, 1.5, 2.5, 5.0, 10.0, 15.0, 30.0, float("inf")
)

AUTHENTICATION_BACKENDS = [
"django.contrib.auth.backends.ModelBackend",
"core.authentication.backends.OIDCAuthenticationBackend",
Expand All @@ -295,6 +304,7 @@ class Base(Configuration):
"drf_spectacular",
# Third party apps
"corsheaders",
"django_prometheus",
"dockerflow.django",
"rest_framework",
"parler",
Expand All @@ -314,7 +324,10 @@ class Base(Configuration):

# Cache
CACHES = {
"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"},
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache" if not PROMETHEUS_EXPORTER
else "django_prometheus.cache.backends.locmem.LocMemCache",
},
}

REST_FRAMEWORK = {
Expand Down
22 changes: 22 additions & 0 deletions src/backend/impress/urls.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""URL configuration for the impress project"""

import os
from django.conf import settings
from django.conf.urls.static import static
from django.contrib import admin
Expand All @@ -12,11 +13,32 @@
SpectacularSwaggerView,
)

from django_prometheus import exports
from core.api.custom_probe_views import liveness_check, readiness_check
from core.api.decorators import monitoring_cidr_protected_view

urlpatterns = [
path("admin/", admin.site.urls),
path("", include("core.urls")),
]

# Conditionally add Prometheus Exporter endpoint
if os.environ.get("PROMETHEUS_EXPORTER", "False").lower() == "true":
# Protect the Prometheus view with the CIDR decorator
urlpatterns.append(
path("prometheus/", monitoring_cidr_protected_view(exports.ExportToDjangoView), name="prometheus-django-metrics"),
)

# Conditionally add liveness and readiness probe endpoints
if os.environ.get("K8S_PROBING", "False").lower() == "true":

urlpatterns.append(
path("probes/liveness/", monitoring_cidr_protected_view(liveness_check), name="liveness-probe"),
)
urlpatterns.append(
path("probes/readiness/", monitoring_cidr_protected_view(readiness_check), name="readiness-probe"),
)

if settings.DEBUG:
urlpatterns = (
urlpatterns
Expand Down
19 changes: 19 additions & 0 deletions src/backend/impress/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,26 @@

from configurations.wsgi import get_wsgi_application

# Prometheus Metrics Registration
from prometheus_client import REGISTRY
from core.api.custom_metrics_exporter import CustomMetricsExporter


def register_prometheus_exporter():
"""
Register custom Prometheus metrics collector.
"""
if not any(isinstance(cme, CustomMetricsExporter) for cme in REGISTRY._collector_to_names):
REGISTRY.register(CustomMetricsExporter())
print("Custom Prometheus metrics registered successfully.")
else:
print("Custom Prometheus metrics already registered.")

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "impress.settings")
os.environ.setdefault("DJANGO_CONFIGURATION", "Development")

# Call register_prometheus_exporter to register Prometheus metrics if enabled
if os.environ.get("PROMETHEUS_EXPORTER", "False").lower() == "true":
register_prometheus_exporter()

application = get_wsgi_application()
1 change: 1 addition & 0 deletions src/backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies = [
"django-countries==7.6.1",
"django-filter==24.3",
"django-parler==2.3",
"django-prometheus==2.3.1",
"redis==5.1.1",
"django-redis==5.4.0",
"django-storages[s3]==1.14.4",
Expand Down