Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion backend/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ __pycache__/

# logging
*.log
logs

# security
*.pem
*.pem

# loki
chunks
compactor
rules
tsdb-shipper-active
tsdb-shipper-cache
wal
2 changes: 1 addition & 1 deletion backend/alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
DATABASE_URL = get_env("DATABASE_URL")

if not DATABASE_URL:
raise ValueError(f"No database URL configured for the {get_env("ENVIRONMENT")} environment.")
raise ValueError(f"No database URL configured for the {get_env('ENVIRONMENT')} environment.")

config = context.config
config.set_main_option("sqlalchemy.url", DATABASE_URL)
Expand Down
12 changes: 12 additions & 0 deletions backend/app/db/database.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncpg
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Expand Down Expand Up @@ -33,3 +34,14 @@ async def create_db_session():

async def close_db_session(session):
await session.close()


async def get_connection():
"""PostgreSQL 데이터베이스에 직접 연결하고 연결 객체를 반환"""
conn = await asyncpg.connect(DATABASE_URL.replace('postgresql+asyncpg', 'postgresql'))
return conn


async def close_connection(conn):
"""PostgreSQL 연결 종료"""
await conn.close()
2 changes: 2 additions & 0 deletions backend/app/llm/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from langchain_text_splitters import RecursiveCharacterTextSplitter

from app.llm.message_task import store
from app.utils.logging import logger


def get_rag_chain(document_text: str):
Expand Down Expand Up @@ -115,6 +116,7 @@ def get_langchain_response(user_message: str, chat_room_id: int):
)

except Exception as e:
logger.error("Error generating answer: ", exc_info={e})
raise RuntimeError(f"Error generating answer: {e}")


Expand Down
37 changes: 35 additions & 2 deletions backend/app/main.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import time
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
from app.utils.logging import log_exception
from app.routers import auth, me, folders, chatrooms, messages, links, rating

from app.routers import auth, me, folders, chatrooms, messages, links, rating, metrics
from app.metrics.prometheus_metrics import REQUEST_COUNT, REQUEST_DURATION, RESPONSE_STATUS
from prometheus_fastapi_instrumentator import Instrumentator

@asynccontextmanager
async def lifespan(app: FastAPI):
Expand All @@ -17,6 +19,7 @@ async def lifespan(app: FastAPI):

origins = [
"http://localhost:3000",
"http://localhost:3001",
]

app.add_middleware(
Expand All @@ -27,6 +30,35 @@ async def lifespan(app: FastAPI):
allow_headers=["*"],
)

instrumentator = Instrumentator().instrument(app)
instrumentator.expose(app)

@app.middleware("http")
async def metrics_middleware(request: Request, call_next):
start_time = time.time()
response = await call_next(request)
process_time = time.time() - start_time

# 요청 처리 시간 기록
response.headers["X-Process-Time"] = str(process_time)

# 요청 메트릭을 수집 (HTTP 메서드, 요청 URL)
method = request.method
path = request.url.path

# 메트릭 수집
method = request.method
path = request.url.path
status_code = response.status_code

REQUEST_COUNT.labels(method=method, path=path).inc()
REQUEST_DURATION.labels(method=method, path=path).observe(process_time)
RESPONSE_STATUS.labels(method=method, path=path, status_code=status_code).inc()

response.headers["X-Process-Time"] = str(process_time)

return response


@app.exception_handler(HTTPException)
async def custom_http_exception_handler(req: Request, exc: HTTPException):
Expand All @@ -46,3 +78,4 @@ async def custom_http_exception_handler(req: Request, exc: HTTPException):
app.include_router(messages.router)
app.include_router(links.router)
app.include_router(rating.router)
app.include_router(metrics.router)
108 changes: 108 additions & 0 deletions backend/app/metrics/pg_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from prometheus_client import Gauge
from app.db.database import get_connection, close_connection
from app.metrics.prometheus_metrics import *

async def record_pg_metrics():
# Connect to PostgreSQL
conn = await get_connection()
try:
# Total transactions
commit_result = await conn.fetchval("SELECT xact_commit FROM pg_stat_database WHERE datname = current_database();")
COMMIT_COUNT.set(commit_result)

rollback_result = await conn.fetchval("SELECT xact_rollback FROM pg_stat_database WHERE datname = current_database();")
ROLLBACK_COUNT.set(rollback_result)

# Active connections
active_conn_result = await conn.fetchval("SELECT count(*) FROM pg_stat_activity WHERE state = 'active';")
ACTIVE_CONNECTIONS.set(active_conn_result)

# Total cache hit ration
cache_hit_result = await conn.fetchval("""
SELECT
CASE
WHEN SUM(heap_blks_hit + heap_blks_read) = 0 THEN 0
ELSE 100 * SUM(heap_blks_hit) / SUM(heap_blks_hit + heap_blks_read)
END AS overall_cache_hit_ratio
FROM pg_statio_user_tables
WHERE (heap_blks_hit + heap_blks_read) > 0;
""")
if cache_hit_result is not None:
TOTAL_CACHE_HIT_RATIO.set(cache_hit_result)
else:
TOTAL_CACHE_HIT_RATIO.set(0)

# Cache hit ratio
cache_hit_results = await conn.fetch("""
SELECT relname,
heap_blks_hit,
heap_blks_read,
CASE
WHEN (heap_blks_hit + heap_blks_read) = 0 THEN 0
ELSE 100 * heap_blks_hit / (heap_blks_hit + heap_blks_read)
END AS cache_hit_ratio
FROM pg_statio_user_tables;
""")

for result in cache_hit_results:
table_name = result['relname']
cache_hit_ratio = result['cache_hit_ratio']

# If the metric for this table doesn't exist yet, create it
if table_name not in CACHE_HIT_RATIO_METRICS:
CACHE_HIT_RATIO_METRICS[table_name] = Gauge(f'cache_hit_ratio_{table_name}', f'Cache hit ratio for {table_name} table')

# Set the value of the metric to the cache hit ratio
CACHE_HIT_RATIO_METRICS[table_name].set(cache_hit_ratio)

# Index scan ratio (with division by zero and None protection)
index_scan_result = await conn.fetchval("""
SELECT CASE
WHEN (sum(seq_scan) + sum(idx_scan)) = 0 THEN 0
ELSE sum(idx_scan) / (sum(seq_scan) + sum(idx_scan))
END AS index_scan_ratio
FROM pg_stat_user_tables;
""")

# Handle None case for index scan ratio
INDEX_SCAN_RATIO.set(index_scan_result)

# Fetch, Insert, Update, Delete rates
rates_result = await conn.fetchrow("""
SELECT tup_fetched, tup_inserted, tup_updated, tup_deleted
FROM pg_stat_database WHERE datname = current_database();
""")
FETCH_RATE.set(rates_result['tup_fetched'])
INSERT_RATE.set(rates_result['tup_inserted'])
UPDATE_RATE.set(rates_result['tup_updated'])
DELETE_RATE.set(rates_result['tup_deleted'])

# Deadlock count
deadlock_result = await conn.fetchval("""
SELECT deadlocks FROM pg_stat_database WHERE datname = current_database();
""")
DEADLOCK_COUNT.set(deadlock_result)

# Replication lag (in bytes)
replication_lag_result = await conn.fetchval("""
SELECT pg_current_wal_lsn() - replay_lsn AS replication_lag_bytes
FROM pg_stat_replication;
""")

# Handle None case for replication lag
if replication_lag_result is not None:
REPLICATION_LAG_BYTES.set(replication_lag_result)
else:
REPLICATION_LAG_BYTES.set(0) # Set to 0 if no


# Data transfer volume
disk_result = await conn.fetchval("SELECT blks_read FROM pg_stat_database WHERE datname = current_database();")
READ_DISK.set(disk_result)

cache_result = await conn.fetchval("SELECT blks_hit FROM pg_stat_database WHERE datname = current_database();")
READ_CACHE.set(cache_result)


finally:
await close_connection(conn)
23 changes: 23 additions & 0 deletions backend/app/metrics/prometheus_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from prometheus_client import Counter, Histogram, Gauge

# Prometheus 메트릭 정의 (backend)
REQUEST_COUNT = Counter("request_count", "Total request count", ["method", "path"])
REQUEST_DURATION = Histogram("request_duration_seconds", "Request duration", ["method", "path"])
RESPONSE_STATUS = Counter('http_response_status', 'HTTP Response Status Codes', ['method', 'path', 'status_code'])

# Prometheus 메트릭 정의 (postgres)
ACTIVE_CONNECTIONS = Gauge('pg_active_connections', 'Number of active connections')
TOTAL_CACHE_HIT_RATIO = Gauge('pg_total_cache_hit_ratio', 'Total cache hit ratio')
INDEX_SCAN_RATIO = Gauge('pg_index_scan_ratio', 'Index scan ratio')
FETCH_RATE = Gauge('pg_fetch_rate', 'Fetch throughput')
INSERT_RATE = Gauge('pg_insert_rate', 'Insert throughput')
UPDATE_RATE = Gauge('pg_update_rate', 'Update throughput')
DELETE_RATE = Gauge('pg_delete_rate', 'Delete throughput')
DEADLOCK_COUNT = Gauge('pg_deadlock_count', 'Number of deadlocks')
REPLICATION_LAG_BYTES = Gauge('pg_replication_lag_bytes', 'Replication lag in bytes')
COMMIT_COUNT = Gauge('pg_stat_database_xact_commit', 'Number of commit transactions')
ROLLBACK_COUNT = Gauge('pg_stat_database_xact_rollback', 'Number of rollback transactions')
READ_DISK = Gauge('pg_stat_database_blks_read', 'Number of blocks read from disk')
READ_CACHE = Gauge('pg_stat_database_blks_hit', 'Number of blocks read from cache')

CACHE_HIT_RATIO_METRICS = {}
Loading