manavgup · manavgup · Nov 25, 2025 · Nov 23, 2025 · Nov 24, 2025 · Nov 24, 2025
diff --git a/backend/core/config.py b/backend/core/config.py
@@ -159,7 +159,7 @@ class Settings(BaseSettings):
     reranker_type: Annotated[str, Field(default="llm", alias="RERANKER_TYPE")]  # Options: llm, simple, cross-encoder
     reranker_top_k: Annotated[
         int | None, Field(default=5, alias="RERANKER_TOP_K")
-    ]  # Default 5 for optimal quality/speed
+    ]  # Number of top results to return after reranking
     reranker_batch_size: Annotated[int, Field(default=10, alias="RERANKER_BATCH_SIZE")]
     reranker_score_scale: Annotated[int, Field(default=10, alias="RERANKER_SCORE_SCALE")]  # 0-10 scoring scale
     reranker_prompt_template_name: Annotated[

diff --git a/backend/rag_solution/services/answer_synthesizer.py b/backend/rag_solution/services/answer_synthesizer.py
@@ -1,9 +1,12 @@
 """Answer synthesizer component for Chain of Thought reasoning."""
 
 from core.config import Settings, get_settings
+from core.logging_utils import get_logger
 from rag_solution.generation.providers.base import LLMBase
 from rag_solution.schemas.chain_of_thought_schema import ReasoningStep, SynthesisResult
 
+logger = get_logger(__name__)
+
 
 class AnswerSynthesizer:
     """Component for synthesizing answers from reasoning steps."""
@@ -19,14 +22,14 @@ def __init__(self, llm_service: LLMBase | None = None, settings: Settings | None
         self.settings = settings or get_settings()
 
     def synthesize(self, original_question: str, reasoning_steps: list[ReasoningStep]) -> str:
-        """Synthesize a final answer from reasoning steps.
+        """Synthesize a final answer from reasoning steps with proper Markdown formatting.
 
         Args:
             original_question: The original question.
             reasoning_steps: The reasoning steps taken.
 
         Returns:
-            The synthesized final answer.
+            The synthesized final answer formatted in Markdown.
         """
         if not reasoning_steps:
             return "Unable to generate an answer due to insufficient information."
@@ -37,22 +40,36 @@ def synthesize(self, original_question: str, reasoning_steps: list[ReasoningStep
         if not intermediate_answers:
             return "Unable to synthesize an answer from the reasoning steps."
 
-        # Simple synthesis (in production, this would use an LLM)
+        # Simple synthesis with Markdown formatting
         if len(intermediate_answers) == 1:
             return intermediate_answers[0]
 
-        # Combine multiple answers
-        synthesis = f"Based on the analysis of {original_question}: "
+        # Combine multiple answers with proper Markdown structure
+        synthesis_parts = []
 
-        for i, answer in enumerate(intermediate_answers):
-            if i == 0:
-                synthesis += answer
-            elif i == len(intermediate_answers) - 1:
-                synthesis += f" Additionally, {answer.lower()}"
-            else:
-                synthesis += f" Furthermore, {answer.lower()}"
+        # Add main answer header
+        synthesis_parts.append(f"## Answer to: {original_question}\n")
+
+        # Add each reasoning step as a section
+        for i, (step, answer) in enumerate(zip(reasoning_steps, intermediate_answers, strict=False), 1):
+            synthesis_parts.append(f"### Step {i}: {step.question}\n")
+            synthesis_parts.append(f"{answer}\n")
+
+        # Add summary section if multiple steps
+        if len(intermediate_answers) > 1:
+            synthesis_parts.append("### Summary\n")
+            synthesis_parts.append(f"Based on the analysis above, {intermediate_answers[0]}")
+
+            remaining_answers = intermediate_answers[1:]
+            for i, answer in enumerate(remaining_answers):
+                if i == len(remaining_answers) - 1:
+                    # Last item in remaining answers
+                    synthesis_parts.append(f" Additionally, {answer.lower()}")
+                else:
+                    synthesis_parts.append(f" Furthermore, {answer.lower()}")
 
-        return synthesis
+        result = "\n".join(synthesis_parts)
+        return result
 
     async def synthesize_answer(self, original_question: str, reasoning_steps: list[ReasoningStep]) -> SynthesisResult:
         """Synthesize answer and return result object like tests expect.

diff --git a/backend/rag_solution/services/chain_of_thought_service.py b/backend/rag_solution/services/chain_of_thought_service.py
@@ -245,8 +245,19 @@ def _generate_llm_response(
             logger.warning("LLM service %s does not have generate_text_with_usage method", type(llm_service))
             return f"Based on the context, {question.lower().replace('?', '')}...", None
 
-        # Create a proper prompt with context
-        prompt = f"Question: {question}\n\nContext: {' '.join(context)}\n\nAnswer:"
+        # Create a proper prompt with context and request Markdown formatting
+        prompt = f"""Question: {question}
+
+Context: {" ".join(context)}
+
+Please provide a detailed answer using proper Markdown formatting:
+- Use ## for main headers
+- Use ### for sub-headers
+- Use bullet points (-) for lists
+- Use **bold** for emphasis
+- Use tables when presenting data
+
+Answer:"""
 
         try:
             from rag_solution.schemas.llm_usage_schema import ServiceType
@@ -462,6 +473,14 @@ async def execute_chain_of_thought(
         # Synthesize final answer
         final_answer = self.answer_synthesizer.synthesize(cot_input.question, reasoning_steps)
 
+        # DEBUG: Log final answer before returning
+        logger.info("🔍 COT_SERVICE: Final answer length: %d chars", len(final_answer))
+        logger.debug("🔍 COT_SERVICE: First 200 chars: %s", final_answer[:200])
+        if "##" in final_answer or "###" in final_answer:
+            logger.info("✅ COT_SERVICE: Markdown headers present in final answer")
+        else:
+            logger.warning("⚠️ COT_SERVICE: NO Markdown headers in final answer")
+
         # Generate source summary
         source_summary = self.source_attribution_service.aggregate_sources_across_steps(reasoning_steps)
 

diff --git a/backend/rag_solution/services/prompt_template_service.py b/backend/rag_solution/services/prompt_template_service.py
@@ -225,10 +225,41 @@ def format_prompt_with_template(self, template: PromptTemplateBase, variables: d
             raise ValidationError(f"Failed to format prompt: {e!s}") from e
 
     def _format_prompt_with_template(self, template: PromptTemplateBase, variables: dict[str, Any]) -> str:
-        """Internal method to format prompt with a template object."""
+        """Internal method to format prompt with a template object.
+
+        For RAG_QUERY templates, automatically appends Markdown formatting instructions
+        to ensure well-structured, readable responses.
+
+        Issue #655: Add explicit Markdown formatting requests to improve LLM output structure.
+        """
         parts = []
         if template.system_prompt:
             parts.append(str(template.system_prompt))
+
+        # Add Markdown formatting instructions for RAG_QUERY and COT_REASONING templates (Issue #655)
+        if template.template_type in (PromptTemplateType.RAG_QUERY, PromptTemplateType.COT_REASONING):
+            markdown_instructions = (
+                "\n\nIMPORTANT - Response Formatting Requirements:\n"
+                "Format your response in clean, well-structured Markdown:\n"
+                "- Use ## for main sections and ### for subsections\n"
+                "- Separate paragraphs with blank lines for better readability\n"
+                "- Keep paragraphs concise (2-4 sentences maximum)\n"
+                "- Use bullet points (-) for lists of items\n"
+                "- Use numbered lists (1., 2., 3.) for sequential steps\n"
+                "- Use **bold** for key concepts and important terms\n"
+                "- Use *italic* for emphasis or definitions\n"
+                "- Format tables using Markdown table syntax with | separators\n"
+                "- Add blank lines before and after tables, lists, and code blocks\n"
+                "\nExample structure:\n"
+                "## Main Topic\n\n"
+                "Brief introduction paragraph.\n\n"
+                "### Key Points\n\n"
+                "- First important point\n"
+                "- Second important point\n\n"
+                "Explanatory paragraph with **key terms** highlighted."
+            )
+            parts.append(markdown_instructions)
+
         parts.append(template.template_format.format(**variables))
         return "\n\n".join(parts)
 

diff --git a/backend/rag_solution/services/search_service.py b/backend/rag_solution/services/search_service.py
@@ -2,6 +2,7 @@
 # pylint: disable=too-many-lines
 # Justification: Search service orchestrates multiple complex search paths
 
+import re
 import time
 from collections.abc import Callable
 from functools import wraps
@@ -42,6 +43,13 @@
 
 logger = get_logger("services.search")
 
+# Pre-compiled regex pattern for HTML tag detection (optimized for performance)
+# Matches common HTML tags: table, div, p, b, strong, em, i, a, ul, ol, li, h1-h6, code, pre, blockquote, img, br, hr
+_HTML_TAG_PATTERN = re.compile(
+    r"<(?:table|div|p|b|strong|em|i|a\s|ul|ol|li|h[1-6]|code|pre|blockquote|img\s|br|hr)",
+    re.IGNORECASE,
+)
+
 T = TypeVar("T")
 P = ParamSpec("P")
 
@@ -352,37 +360,102 @@ def _generate_document_metadata(
 
     def _clean_generated_answer(self, answer: str) -> str:
         """
-        Clean generated answer by removing artifacts and duplicates.
+        Clean generated answer by removing artifacts and duplicates while preserving Markdown.
 
         Removes:
         - " AND " artifacts from query rewriting
-        - Duplicate consecutive words
+        - Duplicate consecutive words (except in Markdown headers)
         - Leading/trailing whitespace
+
+        Converts:
+        - HTML formatting to Markdown (tables, bold, italic, links, lists, etc.)
+
+        Preserves:
+        - Markdown headers (##, ###, etc.)
+        - Markdown formatting (bold, italic, lists, code blocks, etc.)
+
+        This ensures proper rendering in the React frontend which uses ReactMarkdown.
+        Issue #655: Support all HTML formatting types, not just tables.
         """
         # pylint: disable=import-outside-toplevel
-        # Justification: Lazy import to avoid loading re module unless needed
-        import re
+        # Justification: Lazy import to avoid loading html2text unless needed
+        import html2text
 
         cleaned = answer.strip()
 
-        # Remove " AND " artifacts that come from query rewriting
+        # Convert HTML to Markdown if HTML tags detected
+        # Optimized: Use single pre-compiled regex pattern instead of 20+ searches
+        if "<" in cleaned and ">" in cleaned and _HTML_TAG_PATTERN.search(cleaned):
+            try:
+                # Configure html2text for clean Markdown conversion
+                h = html2text.HTML2Text()
+                h.body_width = 0  # Don't wrap lines
+                h.unicode_snob = True  # Use Unicode characters
+                h.ignore_links = False  # Keep links
+                h.ignore_images = False  # Keep images
+                h.ignore_emphasis = False  # Keep bold/italic
+                h.skip_internal_links = False  # Keep all links
+                h.inline_links = True  # Use inline link format [text](url)
+                h.protect_links = True  # Don't mangle URLs
+                h.wrap_links = False  # Don't wrap links
+                h.wrap_lists = False  # Don't wrap lists
+
+                # Convert HTML to Markdown
+                cleaned = h.handle(cleaned)
+            except Exception as e:
+                # If HTML conversion fails, log warning and continue with original text
+                logger.warning("Failed to convert HTML to Markdown: %s", e)
+                # Continue with original cleaned text (HTML tags will remain but ReactMarkdown handles them safely)
+
+        # STEP 1: Protect Markdown headers and formatting before cleaning
+        # Extract and protect Markdown headers (##, ###, etc.)
+        markdown_header_pattern = re.compile(r"^(#{1,6}\s+.+)$", re.MULTILINE)
+        markdown_headers = markdown_header_pattern.findall(cleaned)
+        header_placeholders = {}
+
+        # Replace all occurrences of each header to handle duplicates correctly
+        for i, header in enumerate(markdown_headers):
+            placeholder = f"__MDHEADER_{i}__"
+            header_placeholders[placeholder] = header
+            # Replace all occurrences of this header (not just first)
+            cleaned = cleaned.replace(header, placeholder)
+
+        # STEP 2: Remove " AND " artifacts that come from query rewriting
         # Handle both middle "AND" and trailing "AND"
         cleaned = re.sub(r"\s+AND\s+", " ", cleaned)  # Middle ANDs
         cleaned = re.sub(r"\s+AND$", "", cleaned)  # Trailing AND
 
-        # Remove duplicate consecutive words
-        words = cleaned.split()
-        deduplicated_words = []
-        prev_word = None
+        # STEP 3: Remove duplicate consecutive words (but NOT in protected headers)
+        lines = cleaned.split("\n")
+        processed_lines = []
+
+        for line in lines:
+            # Skip deduplication for lines with header placeholders
+            if any(placeholder in line for placeholder in header_placeholders):
+                processed_lines.append(line)
+                continue
+
+            # Process regular lines
+            words = line.split()
+            deduplicated_words = []
+            prev_word = None
+
+            for word in words:
+                if not prev_word or word.lower() != prev_word.lower():
+                    deduplicated_words.append(word)
+                prev_word = word  # Always update prev_word for next iteration
+
+            processed_lines.append(" ".join(deduplicated_words))
+
+        result = "\n".join(processed_lines)
 
-        for word in words:
-            if not prev_word or word.lower() != prev_word.lower():
-                deduplicated_words.append(word)
-            prev_word = word
+        # STEP 4: Restore Markdown headers
+        for placeholder, header in header_placeholders.items():
+            result = result.replace(placeholder, header)
 
-        # Join back and clean up any multiple spaces
-        result = " ".join(deduplicated_words)
-        result = re.sub(r"\s+", " ", result).strip()
+        # STEP 5: Clean up any multiple spaces (but preserve newlines)
+        result = re.sub(r" +", " ", result)  # Multiple spaces to single space
+        result = result.strip()
 
         return result
 

diff --git a/backend/rag_solution/services/user_provider_service.py b/backend/rag_solution/services/user_provider_service.py
@@ -122,19 +122,7 @@ def _create_default_rag_template(self, user_id: UUID4) -> PromptTemplateOutput:
                 name="default-rag-template",
                 user_id=user_id,
                 template_type=PromptTemplateType.RAG_QUERY,
-                system_prompt=(
-                    "You are a helpful AI assistant specializing in answering questions based on the given context. "
-                    "Answer ONLY the user's question that is provided. "
-                    "Do not generate additional questions or topics. "
-                    "Provide a single, focused, concise answer based on the context.\n\n"
-                    "Format your responses using Markdown for better readability:\n"
-                    "- Use **bold** for emphasis on key points\n"
-                    "- Use bullet points (- or *) for lists\n"
-                    "- Use numbered lists (1. 2. 3.) for sequential steps\n"
-                    "- Use `code blocks` for technical terms or code\n"
-                    "- Use proper headings (## or ###) for sections when appropriate\n"
-                    "- Keep answers well-structured and concise"
-                ),
+                system_prompt="Answer the question based on the context using clear, well-formatted Markdown.",
                 template_format="{context}\n\n{question}",
                 input_variables={
                     "context": "Retrieved context for answering the question",

diff --git a/backend/vectordbs/utils/DEPRECATION_NOTICE.md b/backend/vectordbs/utils/DEPRECATION_NOTICE.md
@@ -0,0 +1,50 @@
+# Deprecation Notice: watsonx.py
+
+## Status: LEGACY - Scheduled for Removal
+
+The `backend/vectordbs/utils/watsonx.py` file is a **duplicate** of the modern provider implementation at `backend/rag_solution/generation/providers/watsonx.py`.
+
+## Current Usage
+
+This legacy file is still used by:
+
+1. `backend/rag_solution/evaluation/llm_as_judge_evals.py` - Uses `generate_batch`, `generate_text`, `get_model`
+2. `backend/rag_solution/data_ingestion/chunking.py` - Uses `get_tokenization` (deprecated function)
+3. `backend/rag_solution/query_rewriting/query_rewriter.py` - Uses `generate_text`
+4. `backend/tests/unit/test_settings_dependency_injection.py` - Test file
+
+## Migration Plan
+
+### Phase 1: Create Utility Wrappers (Recommended)
+
+Create utility functions in the provider file that don't require user context for evaluation/utility use cases.
+
+### Phase 2: Update Imports
+
+Update all files to import from the provider file instead.
+
+### Phase 3: Remove Duplicate
+
+Delete `backend/vectordbs/utils/watsonx.py` once all imports are migrated.
+
+## Why Keep It For Now?
+
+These are utility functions used in:
+
+- **Evaluation pipelines** (llm_as_judge_evals.py) - Runs outside normal request flow
+- **Data ingestion** (chunking.py) - Preprocessing, no user context
+- **Query rewriting** (query_rewriter.py) - Utility function
+
+These don't have user context required by the modern LLMProviderFactory pattern.
+
+## Recommendation
+
+**DO NOT** remove this file until:
+
+1. Utility wrapper functions are created in the provider
+2. All imports are updated and tested
+3. Evaluation and ingestion pipelines are verified to work
+
+## Date: 2025-01-25
+
+## Issue: Duplicate watsonx.py files identified during RAG improvements