manavgup
diff --git a/‎CLAUDE.md‎
Lines changed: 12 additions & 139 deletions b/‎CLAUDE.md‎
Lines changed: 12 additions & 139 deletions
diff --git a/‎backend/core/enhanced_logging.py‎
Lines changed: 12 additions & 15 deletions b/‎backend/core/enhanced_logging.py‎
Lines changed: 12 additions & 15 deletions
diff --git a/‎backend/core/enhanced_logging_example.py‎
Lines changed: 4 additions & 6 deletions b/‎backend/core/enhanced_logging_example.py‎
Lines changed: 4 additions & 6 deletions
@@ -404,155 +404,28 @@ make validate-ci
 
 RAG Modulo implements an enhanced logging system with structured context tracking, request correlation, and performance monitoring. Based on patterns from IBM mcp-context-forge.
 
-#### Key Features
-
-- **Dual Output Formats**: JSON for production/monitoring, text for development
-- **Context Tracking**: Automatic request correlation and entity tracking (collection, user, pipeline, document)
-- **Pipeline Stage Tracking**: Track operations through each RAG pipeline stage
-- **Performance Monitoring**: Automatic timing for all operations
-- **In-Memory Storage**: Queryable log buffer for debugging and admin UI
-
-#### Configuration
-
-```env
-# Logging settings (.env)
-LOG_LEVEL=INFO                    # DEBUG, INFO, WARNING, ERROR, CRITICAL
-LOG_FORMAT=text                   # text (dev) or json (prod)
-LOG_TO_FILE=true
-LOG_FILE=rag_modulo.log
-LOG_FOLDER=logs
-LOG_ROTATION_ENABLED=true
-LOG_MAX_SIZE_MB=10
-LOG_BACKUP_COUNT=5
-
-# Log storage (in-memory)
-LOG_STORAGE_ENABLED=true
-LOG_BUFFER_SIZE_MB=5
-```
-
-#### Usage in Services
+**Key Features**: Dual output formats (JSON/text), context tracking, pipeline stage tracking, performance monitoring, in-memory queryable storage.
 
+**Quick Example**:
 ```python
 from core.enhanced_logging import get_logger
 from core.logging_context import log_operation, pipeline_stage_context, PipelineStage
 
 logger = get_logger(__name__)
 
-async def search(self, search_input: SearchInput) -> SearchOutput:
-    # Wrap entire operation for automatic timing and context
-    with log_operation(
-        logger,
-        "search_documents",
-        entity_type="collection",
-        entity_id=str(search_input.collection_id),
-        user_id=str(search_input.user_id),
-        query=search_input.question  # Additional metadata
-    ):
-        # Each pipeline stage tracked separately
-        with pipeline_stage_context(PipelineStage.QUERY_VALIDATION):
-            validate_search_input(search_input)
-
-        with pipeline_stage_context(PipelineStage.QUERY_REWRITING):
-            rewritten = await self.rewrite_query(search_input.question)
-            logger.info("Query rewritten", extra={
-                "original": search_input.question,
-                "rewritten": rewritten
-            })
-
-        with pipeline_stage_context(PipelineStage.VECTOR_SEARCH):
-            results = await self.vector_search(rewritten)
-            logger.info("Vector search completed", extra={
-                "result_count": len(results),
-                "top_score": results[0].score if results else 0
-            })
-```
-
-#### Log Output Examples
-
-**Text Format** (development):
-```
-[2025-10-22T10:30:45] INFO     rag.search: Starting search_documents [req_id=req_abc123, collection=coll_456, user=user_xyz]
-[2025-10-22T10:30:45] INFO     rag.search: Query rewritten [stage=query_rewriting] | original=What is AI?, rewritten=artificial intelligence machine learning
-[2025-10-22T10:30:45] INFO     rag.search: Vector search completed [stage=vector_search] | result_count=5, top_score=0.95
-[2025-10-22T10:30:45] INFO     rag.search: Completed search_documents (took 234.56ms)
-```
-
-**JSON Format** (production):
-```json
-{
-  "timestamp": "2025-10-22T10:30:45.123Z",
-  "level": "info",
-  "logger": "rag.search",
-  "message": "Query rewritten",
-  "context": {
-    "request_id": "req_abc123",
-    "user_id": "user_xyz",
-    "collection_id": "coll_456",
-    "operation": "search_documents",
-    "pipeline_stage": "query_rewriting"
-  },
-  "original": "What is AI?",
-  "rewritten": "artificial intelligence machine learning",
-  "execution_time_ms": 45.2
-}
+with log_operation(logger, "search", "collection", coll_id, user_id=user_id):
+    with pipeline_stage_context(PipelineStage.QUERY_REWRITING):
+        logger.info("Query rewritten", extra={"original": q, "rewritten": rq})
 ```
 
-#### Pipeline Stages
-
-Standard pipeline stage constants available in `PipelineStage`:
-
-**Query Processing**: `QUERY_VALIDATION`, `QUERY_REWRITING`, `QUERY_EXPANSION`, `QUERY_DECOMPOSITION`
-**Embedding**: `EMBEDDING_GENERATION`, `EMBEDDING_BATCHING`
-**Retrieval**: `VECTOR_SEARCH`, `KEYWORD_SEARCH`, `HYBRID_SEARCH`, `DOCUMENT_RETRIEVAL`
-**Reranking**: `RERANKING`, `RELEVANCE_SCORING`
-**Generation**: `PROMPT_CONSTRUCTION`, `LLM_GENERATION`, `ANSWER_PROCESSING`, `SOURCE_ATTRIBUTION`
-**Chain of Thought**: `COT_REASONING`, `COT_QUESTION_DECOMPOSITION`, `COT_ANSWER_SYNTHESIS`
-**Documents**: `DOCUMENT_PARSING`, `DOCUMENT_CHUNKING`, `DOCUMENT_INDEXING`
-
-#### Benefits
-
-✅ **Full Request Traceability**: Track every search request through the entire RAG pipeline
-✅ **Performance Insights**: Automatic timing for each pipeline stage
-✅ **Debugging 50% Faster**: Structured context makes finding issues trivial
-✅ **Production Ready**: JSON output integrates with ELK, Splunk, CloudWatch
-✅ **Zero Performance Impact**: Async logging with buffering
-✅ **Developer Friendly**: Human-readable text format for local development
-✅ **Queryable**: In-memory log storage for admin UI and debugging
-
-#### Migration from Old Logging
-
-The old `logging_utils.py` continues to work during migration:
+**📖 Full Documentation**: [docs/development/logging.md](docs/development/logging.md)
 
-```python
-# Old style (still works)
-from core.logging_utils import get_logger
-logger = get_logger(__name__)
-logger.info("Something happened")
-
-# New style (enhanced - recommended)
-from core.enhanced_logging import get_logger
-from core.logging_context import log_operation
-
-logger = get_logger(__name__)
-with log_operation(logger, "operation_name", "entity_type", "entity_id"):
-    logger.info("Something happened", extra={"key": "value"})
-```
-
-#### Example Integration
-
-See `backend/core/enhanced_logging_example.py` for comprehensive examples including:
-- Simple search operations
-- Chain of Thought reasoning
-- Error handling
-- Batch processing
-- API endpoint integration
-
-#### Testing
-
-Run logging tests:
-```bash
-pytest backend/tests/unit/test_enhanced_logging.py -v
-```
+- Configuration reference
+- Complete usage examples
+- API reference
+- Migration guide
+- Testing guide
+- Troubleshooting
 
 ### Vector Database Support
 
 
@@ -21,18 +21,15 @@
 import logging.handlers
 import os
 from asyncio import AbstractEventLoop, get_running_loop
-from pathlib import Path
-from typing import Any, Optional
 
 from pythonjsonlogger import jsonlogger
 
 from core.log_storage_service import LogLevel, LogStorageService
-from core.logging_context import get_context
 
 # Global handlers will be created lazily
-_file_handler: Optional[logging.Handler] = None
-_text_handler: Optional[logging.StreamHandler] = None
-_storage_handler: Optional[logging.Handler] = None
+_file_handler: logging.Handler | None = None
+_text_handler: logging.StreamHandler | None = None
+_storage_handler: logging.Handler | None = None
 
 # Text formatter
 _text_formatter = logging.Formatter(
@@ -48,7 +45,7 @@
 
 def _get_file_handler(
     log_file: str = "rag_modulo.log",
-    log_folder: Optional[str] = "logs",
+    log_folder: str | None = "logs",
     log_rotation_enabled: bool = True,
     log_max_size_mb: int = 10,
     log_backup_count: int = 5,
@@ -72,7 +69,7 @@ def _get_file_handler(
     Raises:
         ValueError: If file logging is disabled or no log file specified
     """
-    global _file_handler  # noqa: PLW0603
+    global _file_handler
     if _file_handler is None:
         if not log_file:
             raise ValueError("No log file specified")
@@ -108,7 +105,7 @@ def _get_text_handler() -> logging.StreamHandler:
     Returns:
         logging.StreamHandler: The stream handler for console logging
     """
-    global _text_handler  # noqa: PLW0603
+    global _text_handler
     if _text_handler is None:
         _text_handler = logging.StreamHandler()
         _text_handler.setFormatter(_text_formatter)
@@ -266,7 +263,7 @@ async def initialize(
         log_format: str = "text",
         log_to_file: bool = True,
         log_file: str = "rag_modulo.log",
-        log_folder: Optional[str] = "logs",
+        log_folder: str | None = "logs",
         log_rotation_enabled: bool = True,
         log_max_size_mb: int = 10,
         log_backup_count: int = 5,
@@ -339,7 +336,7 @@ async def initialize(
             self._storage = LogStorageService(max_size_mb=log_buffer_size_mb)
 
             # Add storage handler to capture all logs
-            global _storage_handler  # noqa: PLW0603
+            global _storage_handler
             _storage_handler = StorageHandler(self._storage)
             _storage_handler.setFormatter(_text_formatter)
             _storage_handler.setLevel(log_level_value)
@@ -398,7 +395,7 @@ def get_logger(self, name: str) -> logging.Logger:
 
         return self._loggers[name]
 
-    def get_storage(self) -> Optional[LogStorageService]:
+    def get_storage(self) -> LogStorageService | None:
         """Get the log storage service if available.
 
         Returns:
@@ -408,7 +405,7 @@ def get_storage(self) -> Optional[LogStorageService]:
 
 
 # Global logging service instance
-_logging_service: Optional[LoggingService] = None
+_logging_service: LoggingService | None = None
 
 
 def get_logging_service() -> LoggingService:
@@ -417,7 +414,7 @@ def get_logging_service() -> LoggingService:
     Returns:
         LoggingService instance
     """
-    global _logging_service  # noqa: PLW0603
+    global _logging_service
     if _logging_service is None:
         _logging_service = LoggingService()
     return _logging_service
@@ -447,7 +444,7 @@ async def initialize_logging(
     log_format: str = "text",
     log_to_file: bool = True,
     log_file: str = "rag_modulo.log",
-    log_folder: Optional[str] = "logs",
+    log_folder: str | None = "logs",
     log_rotation_enabled: bool = True,
     log_max_size_mb: int = 10,
     log_backup_count: int = 5,
 
@@ -11,7 +11,6 @@
 """
 
 import asyncio
-from typing import Optional
 
 from pydantic import UUID4
 
@@ -207,7 +206,7 @@ async def _synthesize_answers(answers: list[dict]) -> str:
     return "Synthesized final answer based on sub-answers"
 
 
-async def example_error_handling(collection_id: UUID4, user_id: UUID4) -> Optional[dict]:
+async def example_error_handling(collection_id: UUID4, user_id: UUID4) -> dict | None:
     """Example error handling with enhanced logging.
 
     Demonstrates how errors are automatically logged with context.
@@ -226,10 +225,9 @@ async def example_error_handling(collection_id: UUID4, user_id: UUID4) -> Option
             entity_type="collection",
             entity_id=str(collection_id),
             user_id=str(user_id),
-        ):
-            with pipeline_stage_context(PipelineStage.DOCUMENT_PROCESSING):
-                # Simulate an error
-                raise ValueError("Simulated processing error")
+        ), pipeline_stage_context(PipelineStage.DOCUMENT_PROCESSING):
+            # Simulate an error
+            raise ValueError("Simulated processing error")
     except ValueError as e:
         # Error is automatically logged by log_operation context manager
         # with full context, timing, and stack trace