diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 346a1f8..749829e 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -1,3 +1,5 @@ +# CI/CD에 필요한 GitHub Secrets (Repo Settings > Secrets and variables > Actions) + name: cicd on: @@ -16,23 +18,32 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Log - Checkout done + run: echo ">>> [CI] Checkout done" + - name: Setup JDK 17 uses: actions/setup-java@v4 with: distribution: temurin java-version: "17" + - name: Log - JDK ready + run: echo ">>> [CI] JDK 17 ready" + - name: Setup Gradle cache uses: gradle/actions/setup-gradle@v4 - name: Run tests - run: ./gradlew test --no-daemon + run: | + echo ">>> [CI] Running tests..." + ./gradlew test --no-daemon + echo ">>> [CI] Tests passed" docker-build-and-push: name: Docker Build and Push runs-on: ubuntu-latest needs: test - if: github.event_name != 'pull_request' && github.ref == 'refs/heads/production' + if: github.ref == 'refs/heads/production' && github.event_name == 'push' permissions: contents: read @@ -42,15 +53,24 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Log - Checkout done + run: echo ">>> [Docker] Checkout done" + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + - name: Log - Buildx ready + run: echo ">>> [Docker] Buildx ready" + - name: Login to Docker Hub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Log - Docker Hub login done + run: echo ">>> [Docker] Docker Hub login done" + - name: Extract Docker metadata id: meta uses: docker/metadata-action@v5 @@ -69,11 +89,14 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + - name: Log - Image pushed + run: echo ">>> [Docker] Image built and pushed" + deploy: name: Deploy to GCE runs-on: ubuntu-latest needs: docker-build-and-push - if: github.event_name == 'push' && github.ref == 'refs/heads/production' + if: github.ref == 'refs/heads/production' && github.event_name == 'push' steps: - name: Deploy over SSH @@ -93,15 +116,18 @@ jobs: SMTP_USERNAME: ${{ secrets.SMTP_USERNAME }} SMTP_PASSWORD: ${{ secrets.SMTP_PASSWORD }} CONTAINER_NAME: dorumdorum-be + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_FULL_NAME: ${{ github.repository }} with: host: ${{ secrets.GCE_HOST }} username: ${{ secrets.GCE_USER }} key: ${{ secrets.GCE_SSH_KEY }} port: ${{ secrets.GCE_SSH_PORT }} - envs: IMAGE,DOCKERHUB_USERNAME,DOCKERHUB_TOKEN,FIREBASE_SERVICE_ACCOUNT_B64,RDB_USERNAME,RDB_URL,RDB_PASSWORD,MONGODB_URI,JWT_KEY,JWT_ACCESS_EXPIRATION,JWT_REFRESH_EXPIRATION,SMTP_USERNAME,SMTP_PASSWORD,CONTAINER_NAME + envs: IMAGE,DOCKERHUB_USERNAME,DOCKERHUB_TOKEN,FIREBASE_SERVICE_ACCOUNT_B64,RDB_USERNAME,RDB_URL,RDB_PASSWORD,MONGODB_URI,JWT_KEY,JWT_ACCESS_EXPIRATION,JWT_REFRESH_EXPIRATION,SMTP_USERNAME,SMTP_PASSWORD,CONTAINER_NAME,GITHUB_TOKEN,REPO_FULL_NAME script_stop: true script: | set -e + echo ">>> [Deploy] SSH 연결됨, 환경 변수 정규화 중..." RDB_USERNAME="$(printf '%s' "$RDB_USERNAME" | tr -d '\r\n')" RDB_URL="$(printf '%s' "$RDB_URL" | tr -d '\r\n')" @@ -116,6 +142,7 @@ jobs: DEPLOY_PATH="${HOME}/dorumdorum" mkdir -p "$DEPLOY_PATH" cd "$DEPLOY_PATH" + echo ">>> [Deploy] DEPLOY_PATH=$DEPLOY_PATH" # 1) Firebase 서비스 계정 파일 생성 (VM에 저장) printf '%s' "$FIREBASE_SERVICE_ACCOUNT_B64" | base64 -d > firebase-service-account.json @@ -138,6 +165,7 @@ jobs: "SMTP_PASSWORD=$SMTP_PASSWORD" \ "FIREBASE_SERVICE_ACCOUNT_PATH=$FIREBASE_SERVICE_ACCOUNT_PATH" > .env chmod 600 .env + echo ">>> [Deploy] .env, firebase-service-account.json 생성 완료" docker info >/dev/null 2>&1 || { echo "Docker socket permission denied for current deploy user." @@ -146,32 +174,74 @@ jobs: } echo "$DOCKERHUB_TOKEN" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin + echo ">>> [Deploy] Docker Hub 로그인 완료" + echo ">>> [Deploy] Redis 이미지 pull 중..." docker pull redis:7-alpine + echo ">>> [Deploy] Backend 이미지 pull 중..." docker pull "$IMAGE" + echo ">>> [Deploy] 네트워크 확인/생성 중..." docker network inspect dorumdorum-net >/dev/null 2>&1 || docker network create dorumdorum-net - docker stop dorumdorum-redis >/dev/null 2>&1 || true - docker rm dorumdorum-redis >/dev/null 2>&1 || true - docker stop "$CONTAINER_NAME" >/dev/null 2>&1 || true - docker rm "$CONTAINER_NAME" >/dev/null 2>&1 || true - - docker run -d --name dorumdorum-redis \ - --restart unless-stopped \ - --network dorumdorum-net \ - -v redis-data:/data \ - redis:7-alpine redis-server --appendonly yes - - # 4) 컨테이너 실행: .env로 FIREBASE_SERVICE_ACCOUNT_PATH까지 주입됨 - docker run -d --name "$CONTAINER_NAME" \ - --restart unless-stopped \ - --network dorumdorum-net \ - -p 8080:8080 \ - --env-file .env \ - -e REDIS_HOST=dorumdorum-redis \ - -e REDIS_PORT=6379 \ - -v "$DEPLOY_PATH/firebase-service-account.json:/app/firebase-service-account.json:ro" \ - "$IMAGE" - + # 통합 docker-compose 파일 및 설정 파일들 준비 + COMPOSE_PATH="${HOME}/dorumdorum-compose" + mkdir -p "$COMPOSE_PATH" + cd "$COMPOSE_PATH" + echo ">>> [Deploy] COMPOSE_PATH=$COMPOSE_PATH, Git clone 중..." + rm -rf temp_repo + + # Git에서 production 브랜치로 docker-compose.yml, monitoring 가져오기 + git clone --depth 1 -b production "https://x-access-token:${GITHUB_TOKEN}@github.com/${REPO_FULL_NAME}.git" temp_repo + echo ">>> [Deploy] Clone 완료, docker-compose.yml · monitoring 복사 중..." + + if [ -f "temp_repo/docker-compose.yml" ]; then + cp temp_repo/docker-compose.yml . + echo ">>> [Deploy] docker-compose.yml 복사 완료" + else + echo "ERROR: docker-compose.yml not found in temp_repo/" + ls -la temp_repo/ || true + exit 1 + fi + + if [ -d "temp_repo/monitoring" ]; then + mkdir -p monitoring + cp -r temp_repo/monitoring/* monitoring/ + echo ">>> [Deploy] monitoring 설정 파일 복사 완료" + else + echo "WARNING: monitoring directory not found, creating empty structure" + mkdir -p monitoring/prometheus monitoring/grafana/provisioning/dashboards/json monitoring/grafana/provisioning/datasources + fi + + rm -rf temp_repo + echo ">>> [Deploy] temp_repo 삭제, compose 파일 확인 중..." + + # docker-compose.yml 파일 존재 확인 + if [ ! -f "docker-compose.yml" ]; then + echo "ERROR: docker-compose.yml not found after copy" + ls -la + exit 1 + fi + + # .env 파일과 firebase-service-account.json 복사 + cp "$DEPLOY_PATH/.env" . + cp "$DEPLOY_PATH/firebase-service-account.json" . + echo ">>> [Deploy] .env, firebase-service-account.json 복사 완료" + + # docker-compose로 모든 서비스 실행 + export BACKEND_IMAGE="$IMAGE" + export CONTAINER_NAME="$CONTAINER_NAME" + echo ">>> [Deploy] 기존 컨테이너 정리 (down + 이름으로 강제 제거)..." + docker-compose -f docker-compose.yml down --remove-orphans >/dev/null 2>&1 || true + docker rm -f dorumdorum-redis dorumdorum-be dorumdorum-prometheus dorumdorum-grafana 2>/dev/null || true + echo ">>> [Deploy] docker-compose up -d 실행 중..." + docker-compose -f docker-compose.yml up -d + + echo ">>> [Deploy] 오래된 이미지 정리 중..." docker image prune -af --filter "until=168h" + + echo ">>> [Deploy] 배포 완료" + echo "- Backend: http://localhost:8080" + echo "- Redis: localhost:6379" + echo "- Prometheus: http://localhost:9090" + echo "- Grafana: http://localhost:3000 (admin/admin)" diff --git a/be.env.example b/be.env.example deleted file mode 100644 index 992dc4c..0000000 --- a/be.env.example +++ /dev/null @@ -1,28 +0,0 @@ -SPRING_PROFILES_ACTIVE=prod - -# MySQL -RDB_USERNAME=your_db_user -RDB_URL=jdbc:mysql://your-db-host:3306/your-db-name -RDB_PASSWORD=your_db_password - -# Redis (overridden by deploy script to dorumdorum-redis:6379) -REDIS_HOST=localhost -REDIS_PORT=6379 - -# MongoDB -MONGODB_URI=mongodb+srv://user:password@cluster-url/database?retryWrites=true&w=majority - -# JWT (seconds) -JWT_KEY=your_jwt_secret_key -JWT_ACCESS_EXPIRATION=900 -JWT_REFRESH_EXPIRATION=1209600 - -# SMTP -SMTP_USERNAME=your_smtp_username -SMTP_PASSWORD=your_smtp_password - -# Firebase -FIREBASE_SERVICE_ACCOUNT_PATH=/app/firebase-service-account.json - -# Optional JVM options -JAVA_OPTS=-Xms256m -Xmx512m diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c64fb03 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,77 @@ +services: + redis: + image: redis:7-alpine + container_name: dorumdorum-redis + ports: + - "6379:6379" + volumes: + - redis_data:/data + command: redis-server --appendonly yes + restart: unless-stopped + networks: + - dorumdorum-net + + backend: + image: ${BACKEND_IMAGE:-koungq/dorumdorum-be:latest} + container_name: ${CONTAINER_NAME:-dorumdorum-be} + ports: + - "8080:8080" + env_file: + - .env + environment: + - REDIS_HOST=redis + - REDIS_PORT=6379 + volumes: + - ./firebase-service-account.json:/app/firebase-service-account.json:ro + depends_on: + - redis + restart: unless-stopped + networks: + - dorumdorum-net + + prometheus: + image: prom/prometheus:v2.52.0 + container_name: dorumdorum-prometheus + ports: + - "9090:9090" + volumes: + - ./monitoring/prometheus/prometheus.prod.yml:/etc/prometheus/prometheus.yml:ro + - prometheus_data:/prometheus + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.enable-lifecycle" + extra_hosts: + - "host.docker.internal:host-gateway" + restart: unless-stopped + networks: + - dorumdorum-net + + grafana: + image: grafana/grafana:11.2.0 + container_name: dorumdorum-grafana + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + - GF_SERVER_ROOT_URL=http://localhost:3000 + - GF_INSTALL_PLUGINS=grafana-piechart-panel + volumes: + - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro + - grafana_data:/var/lib/grafana + depends_on: + - prometheus + restart: unless-stopped + networks: + - dorumdorum-net + +volumes: + redis_data: + prometheus_data: + grafana_data: + +networks: + dorumdorum-net: + external: true diff --git a/monitoring/grafana/provisioning/dashboards/dashboards.yml b/monitoring/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..b4631f1 --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,12 @@ +apiVersion: 1 +providers: + - name: "dorumdorum" + orgId: 1 + folder: "Dorumdorum" + folderUid: "dorumdorum" + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards/json diff --git a/monitoring/grafana/provisioning/dashboards/json/concurrency-capacity-dashboard.json b/monitoring/grafana/provisioning/dashboards/json/concurrency-capacity-dashboard.json new file mode 100644 index 0000000..81e5374 --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/json/concurrency-capacity-dashboard.json @@ -0,0 +1,410 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\"}[1m]))", + "legendFormat": "RPS", + "refId": "A" + } + ], + "title": "API Request Rate (RPS)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(http_server_requests_seconds_bucket{application=\"dorumdorum\"}[5m])) by (le))", + "legendFormat": "p50", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{application=\"dorumdorum\"}[5m])) by (le))", + "legendFormat": "p95", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(http_server_requests_seconds_bucket{application=\"dorumdorum\"}[5m])) by (le))", + "legendFormat": "p99", + "refId": "C" + } + ], + "title": "API Latency (p50/p95/p99)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 3, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\",status=~\"4..\"}[5m])) / sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\"}[5m])) or vector(0)", + "legendFormat": "4xx error rate", + "refId": "A" + }, + { + "expr": "sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\",status=~\"5..\"}[5m])) / sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\"}[5m])) or vector(0)", + "legendFormat": "5xx error rate", + "refId": "B" + } + ], + "title": "API Error Rate (4xx/5xx)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 4, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "expr": "sum(hikaricp_connections_active{application=\"dorumdorum\"})", + "legendFormat": "active", + "refId": "A" + }, + { + "expr": "sum(hikaricp_connections_pending{application=\"dorumdorum\"})", + "legendFormat": "pending", + "refId": "B" + }, + { + "expr": "sum(increase(hikaricp_connections_timeout_total{application=\"dorumdorum\"}[5m]))", + "legendFormat": "timeout (5m increase)", + "refId": "C" + } + ], + "title": "HikariCP (active/pending/timeout)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 5, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "expr": "jvm_memory_used_bytes{area=\"heap\",application=\"dorumdorum\"}", + "legendFormat": "heap used", + "refId": "A" + }, + { + "expr": "jvm_memory_max_bytes{area=\"heap\",application=\"dorumdorum\"}", + "legendFormat": "heap max", + "refId": "B" + } + ], + "title": "JVM Heap Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 6, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "expr": "rate(jvm_gc_pause_seconds_sum{application=\"dorumdorum\"}[5m])", + "legendFormat": "GC pause seconds/sec", + "refId": "A" + }, + { + "expr": "sum(jvm_threads_state{application=\"dorumdorum\"})", + "legendFormat": "thread count", + "refId": "B" + } + ], + "title": "JVM GC Pause / Thread Count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 21 + }, + "id": 7, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "expr": "increase(mysql_global_status_slow_queries[5m])", + "legendFormat": "slow queries (5m increase)", + "refId": "A" + } + ], + "title": "MySQL Slow Query Count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 21 + }, + "id": 8, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "expr": "increase(mysql_global_status_innodb_row_lock_time[5m])", + "legendFormat": "lock wait time (5m increase)", + "refId": "A" + } + ], + "title": "MySQL Lock Wait Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 21 + }, + "id": 9, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "expr": "increase(mysql_global_status_innodb_deadlocks[5m])", + "legendFormat": "deadlocks (5m increase)", + "refId": "A" + } + ], + "title": "MySQL Deadlock Count", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 39, + "tags": [ + "dorumdorum", + "concurrency", + "capacity" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Dorumdorum Concurrency & Capacity", + "uid": "dorumdorum-concurrency-capacity", + "version": 1, + "weekStart": "" +} diff --git a/monitoring/grafana/provisioning/dashboards/json/dorumdorum-overview.json b/monitoring/grafana/provisioning/dashboards/json/dorumdorum-overview.json new file mode 100644 index 0000000..f15e628 --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/json/dorumdorum-overview.json @@ -0,0 +1,132 @@ +{ + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, "unit": "reqps"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 0}, + "id": 101, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\"}[1m]))", "legendFormat": "RPS", "refId": "A"}], + "title": "📊 RPS (Throughput)", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 3}]}, "unit": "s"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 0}, + "id": 102, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{application=\"dorumdorum\"}[5m])) by (le))", "legendFormat": "p95", "refId": "A"}], + "title": "⏱️ Latency p95", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 3}]}, "unit": "s"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 12, "y": 0}, + "id": 103, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "histogram_quantile(0.99, sum(rate(http_server_requests_seconds_bucket{application=\"dorumdorum\"}[5m])) by (le))", "legendFormat": "p99", "refId": "A"}], + "title": "⏱️ Latency p99", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.01}, {"color": "red", "value": 0.05}]}, "unit": "percentunit"}, "overrides": []}, + "gridPos": {"h": 4, "w": 6, "x": 18, "y": 0}, + "id": 104, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\",status=~\"5..\"}[5m])) / sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\"}[5m])) or vector(0)", "legendFormat": "Error %", "refId": "A"}], + "title": "❌ Error Rate", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "unit": "reqps"}}, + "gridPos": {"h": 7, "w": 12, "x": 0, "y": 4}, + "id": 1, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}, + "targets": [{"expr": "sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\"}[5m])) by (uri, method)", "legendFormat": "{{method}} {{uri}}"}], + "title": "HTTP Request Rate", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "unit": "s"}}, + "gridPos": {"h": 7, "w": 12, "x": 12, "y": 4}, + "id": 2, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}, + "targets": [{"expr": "histogram_quantile(0.50, sum(rate(http_server_requests_seconds_bucket{application=\"dorumdorum\"}[5m])) by (le, uri))", "legendFormat": "p50 {{uri}}"}, {"expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{application=\"dorumdorum\"}[5m])) by (le, uri))", "legendFormat": "p95 {{uri}}"}, {"expr": "histogram_quantile(0.99, sum(rate(http_server_requests_seconds_bucket{application=\"dorumdorum\"}[5m])) by (le, uri))", "legendFormat": "p99 {{uri}}"}], + "title": "HTTP Latency (p50/p95/p99)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "unit": "bytes"}}, + "gridPos": {"h": 7, "w": 12, "x": 0, "y": 11}, + "id": 3, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}, + "targets": [{"expr": "jvm_memory_used_bytes{area=\"heap\",application=\"dorumdorum\"}", "legendFormat": "Used"}, {"expr": "jvm_memory_max_bytes{area=\"heap\",application=\"dorumdorum\"}", "legendFormat": "Max"}], + "title": "JVM Heap Memory", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "max": 1, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.7}, {"color": "red", "value": 0.9}]}, "unit": "percentunit"}}, + "gridPos": {"h": 7, "w": 6, "x": 12, "y": 11}, + "id": 4, + "options": {"minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true}, + "targets": [{"expr": "jvm_memory_used_bytes{area=\"heap\",application=\"dorumdorum\"} / jvm_memory_max_bytes{area=\"heap\",application=\"dorumdorum\"}", "legendFormat": "Heap Usage"}], + "title": "Heap Usage %", + "type": "gauge" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "unit": "short"}}, + "gridPos": {"h": 7, "w": 6, "x": 18, "y": 11}, + "id": 5, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}, + "targets": [{"expr": "jvm_threads_state{application=\"dorumdorum\"}", "legendFormat": "{{state}}"}], + "title": "JVM Threads", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "unit": "s"}}, + "gridPos": {"h": 7, "w": 12, "x": 0, "y": 18}, + "id": 6, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}, + "targets": [{"expr": "rate(jvm_gc_pause_seconds_sum{application=\"dorumdorum\"}[5m])", "legendFormat": "{{action}} - {{gc}}"}], + "title": "GC Pause Rate", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "unit": "reqps"}}, + "gridPos": {"h": 7, "w": 12, "x": 12, "y": 18}, + "id": 7, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}, + "targets": [{"expr": "sum(rate(http_server_requests_seconds_count{application=\"dorumdorum\"}[5m])) by (status)", "legendFormat": "{{status}}"}], + "title": "HTTP Status / 5xx Errors", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 39, + "tags": ["dorumdorum", "performance", "overview"], + "templating": {"list": []}, + "time": {"from": "now-1h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Dorumdorum Overview", + "uid": "dorumdorum-overview", + "version": 1, + "weekStart": "" +} diff --git a/monitoring/grafana/provisioning/datasources/datasources.yml b/monitoring/grafana/provisioning/datasources/datasources.yml new file mode 100644 index 0000000..433ef5c --- /dev/null +++ b/monitoring/grafana/provisioning/datasources/datasources.yml @@ -0,0 +1,9 @@ +apiVersion: 1 +datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false diff --git a/monitoring/prometheus/prometheus.prod.yml b/monitoring/prometheus/prometheus.prod.yml new file mode 100644 index 0000000..c8f5236 --- /dev/null +++ b/monitoring/prometheus/prometheus.prod.yml @@ -0,0 +1,13 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: "dorumdorum" + metrics_path: /actuator/prometheus + static_configs: + - targets: ["host.docker.internal:8080"] + labels: + application: dorumdorum + env: prod +