Skip to content

Commit 5e49076

Browse files
authored
Merge pull request #667 from manavgup/feature/rag-improvements-markdown-copy-button
feat: RAG improvements - Markdown formatting, copy button, and code cleanup
2 parents 30c3695 + ef0482e commit 5e49076

File tree

15 files changed

+1140
-71
lines changed

15 files changed

+1140
-71
lines changed

backend/core/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ class Settings(BaseSettings):
159159
reranker_type: Annotated[str, Field(default="llm", alias="RERANKER_TYPE")] # Options: llm, simple, cross-encoder
160160
reranker_top_k: Annotated[
161161
int | None, Field(default=5, alias="RERANKER_TOP_K")
162-
] # Default 5 for optimal quality/speed
162+
] # Number of top results to return after reranking
163163
reranker_batch_size: Annotated[int, Field(default=10, alias="RERANKER_BATCH_SIZE")]
164164
reranker_score_scale: Annotated[int, Field(default=10, alias="RERANKER_SCORE_SCALE")] # 0-10 scoring scale
165165
reranker_prompt_template_name: Annotated[

backend/rag_solution/services/answer_synthesizer.py

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
"""Answer synthesizer component for Chain of Thought reasoning."""
22

33
from core.config import Settings, get_settings
4+
from core.logging_utils import get_logger
45
from rag_solution.generation.providers.base import LLMBase
56
from rag_solution.schemas.chain_of_thought_schema import ReasoningStep, SynthesisResult
67

8+
logger = get_logger(__name__)
9+
710

811
class AnswerSynthesizer:
912
"""Component for synthesizing answers from reasoning steps."""
@@ -19,14 +22,14 @@ def __init__(self, llm_service: LLMBase | None = None, settings: Settings | None
1922
self.settings = settings or get_settings()
2023

2124
def synthesize(self, original_question: str, reasoning_steps: list[ReasoningStep]) -> str:
22-
"""Synthesize a final answer from reasoning steps.
25+
"""Synthesize a final answer from reasoning steps with proper Markdown formatting.
2326
2427
Args:
2528
original_question: The original question.
2629
reasoning_steps: The reasoning steps taken.
2730
2831
Returns:
29-
The synthesized final answer.
32+
The synthesized final answer formatted in Markdown.
3033
"""
3134
if not reasoning_steps:
3235
return "Unable to generate an answer due to insufficient information."
@@ -37,22 +40,36 @@ def synthesize(self, original_question: str, reasoning_steps: list[ReasoningStep
3740
if not intermediate_answers:
3841
return "Unable to synthesize an answer from the reasoning steps."
3942

40-
# Simple synthesis (in production, this would use an LLM)
43+
# Simple synthesis with Markdown formatting
4144
if len(intermediate_answers) == 1:
4245
return intermediate_answers[0]
4346

44-
# Combine multiple answers
45-
synthesis = f"Based on the analysis of {original_question}: "
47+
# Combine multiple answers with proper Markdown structure
48+
synthesis_parts = []
4649

47-
for i, answer in enumerate(intermediate_answers):
48-
if i == 0:
49-
synthesis += answer
50-
elif i == len(intermediate_answers) - 1:
51-
synthesis += f" Additionally, {answer.lower()}"
52-
else:
53-
synthesis += f" Furthermore, {answer.lower()}"
50+
# Add main answer header
51+
synthesis_parts.append(f"## Answer to: {original_question}\n")
52+
53+
# Add each reasoning step as a section
54+
for i, (step, answer) in enumerate(zip(reasoning_steps, intermediate_answers, strict=False), 1):
55+
synthesis_parts.append(f"### Step {i}: {step.question}\n")
56+
synthesis_parts.append(f"{answer}\n")
57+
58+
# Add summary section if multiple steps
59+
if len(intermediate_answers) > 1:
60+
synthesis_parts.append("### Summary\n")
61+
synthesis_parts.append(f"Based on the analysis above, {intermediate_answers[0]}")
62+
63+
remaining_answers = intermediate_answers[1:]
64+
for i, answer in enumerate(remaining_answers):
65+
if i == len(remaining_answers) - 1:
66+
# Last item in remaining answers
67+
synthesis_parts.append(f" Additionally, {answer.lower()}")
68+
else:
69+
synthesis_parts.append(f" Furthermore, {answer.lower()}")
5470

55-
return synthesis
71+
result = "\n".join(synthesis_parts)
72+
return result
5673

5774
async def synthesize_answer(self, original_question: str, reasoning_steps: list[ReasoningStep]) -> SynthesisResult:
5875
"""Synthesize answer and return result object like tests expect.

backend/rag_solution/services/chain_of_thought_service.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,19 @@ def _generate_llm_response(
245245
logger.warning("LLM service %s does not have generate_text_with_usage method", type(llm_service))
246246
return f"Based on the context, {question.lower().replace('?', '')}...", None
247247

248-
# Create a proper prompt with context
249-
prompt = f"Question: {question}\n\nContext: {' '.join(context)}\n\nAnswer:"
248+
# Create a proper prompt with context and request Markdown formatting
249+
prompt = f"""Question: {question}
250+
251+
Context: {" ".join(context)}
252+
253+
Please provide a detailed answer using proper Markdown formatting:
254+
- Use ## for main headers
255+
- Use ### for sub-headers
256+
- Use bullet points (-) for lists
257+
- Use **bold** for emphasis
258+
- Use tables when presenting data
259+
260+
Answer:"""
250261

251262
try:
252263
from rag_solution.schemas.llm_usage_schema import ServiceType
@@ -462,6 +473,14 @@ async def execute_chain_of_thought(
462473
# Synthesize final answer
463474
final_answer = self.answer_synthesizer.synthesize(cot_input.question, reasoning_steps)
464475

476+
# DEBUG: Log final answer before returning
477+
logger.info("🔍 COT_SERVICE: Final answer length: %d chars", len(final_answer))
478+
logger.debug("🔍 COT_SERVICE: First 200 chars: %s", final_answer[:200])
479+
if "##" in final_answer or "###" in final_answer:
480+
logger.info("✅ COT_SERVICE: Markdown headers present in final answer")
481+
else:
482+
logger.warning("⚠️ COT_SERVICE: NO Markdown headers in final answer")
483+
465484
# Generate source summary
466485
source_summary = self.source_attribution_service.aggregate_sources_across_steps(reasoning_steps)
467486

backend/rag_solution/services/prompt_template_service.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,41 @@ def format_prompt_with_template(self, template: PromptTemplateBase, variables: d
225225
raise ValidationError(f"Failed to format prompt: {e!s}") from e
226226

227227
def _format_prompt_with_template(self, template: PromptTemplateBase, variables: dict[str, Any]) -> str:
228-
"""Internal method to format prompt with a template object."""
228+
"""Internal method to format prompt with a template object.
229+
230+
For RAG_QUERY templates, automatically appends Markdown formatting instructions
231+
to ensure well-structured, readable responses.
232+
233+
Issue #655: Add explicit Markdown formatting requests to improve LLM output structure.
234+
"""
229235
parts = []
230236
if template.system_prompt:
231237
parts.append(str(template.system_prompt))
238+
239+
# Add Markdown formatting instructions for RAG_QUERY and COT_REASONING templates (Issue #655)
240+
if template.template_type in (PromptTemplateType.RAG_QUERY, PromptTemplateType.COT_REASONING):
241+
markdown_instructions = (
242+
"\n\nIMPORTANT - Response Formatting Requirements:\n"
243+
"Format your response in clean, well-structured Markdown:\n"
244+
"- Use ## for main sections and ### for subsections\n"
245+
"- Separate paragraphs with blank lines for better readability\n"
246+
"- Keep paragraphs concise (2-4 sentences maximum)\n"
247+
"- Use bullet points (-) for lists of items\n"
248+
"- Use numbered lists (1., 2., 3.) for sequential steps\n"
249+
"- Use **bold** for key concepts and important terms\n"
250+
"- Use *italic* for emphasis or definitions\n"
251+
"- Format tables using Markdown table syntax with | separators\n"
252+
"- Add blank lines before and after tables, lists, and code blocks\n"
253+
"\nExample structure:\n"
254+
"## Main Topic\n\n"
255+
"Brief introduction paragraph.\n\n"
256+
"### Key Points\n\n"
257+
"- First important point\n"
258+
"- Second important point\n\n"
259+
"Explanatory paragraph with **key terms** highlighted."
260+
)
261+
parts.append(markdown_instructions)
262+
232263
parts.append(template.template_format.format(**variables))
233264
return "\n\n".join(parts)
234265

backend/rag_solution/services/search_service.py

Lines changed: 89 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable=too-many-lines
33
# Justification: Search service orchestrates multiple complex search paths
44

5+
import re
56
import time
67
from collections.abc import Callable
78
from functools import wraps
@@ -42,6 +43,13 @@
4243

4344
logger = get_logger("services.search")
4445

46+
# Pre-compiled regex pattern for HTML tag detection (optimized for performance)
47+
# Matches common HTML tags: table, div, p, b, strong, em, i, a, ul, ol, li, h1-h6, code, pre, blockquote, img, br, hr
48+
_HTML_TAG_PATTERN = re.compile(
49+
r"<(?:table|div|p|b|strong|em|i|a\s|ul|ol|li|h[1-6]|code|pre|blockquote|img\s|br|hr)",
50+
re.IGNORECASE,
51+
)
52+
4553
T = TypeVar("T")
4654
P = ParamSpec("P")
4755

@@ -352,37 +360,102 @@ def _generate_document_metadata(
352360

353361
def _clean_generated_answer(self, answer: str) -> str:
354362
"""
355-
Clean generated answer by removing artifacts and duplicates.
363+
Clean generated answer by removing artifacts and duplicates while preserving Markdown.
356364
357365
Removes:
358366
- " AND " artifacts from query rewriting
359-
- Duplicate consecutive words
367+
- Duplicate consecutive words (except in Markdown headers)
360368
- Leading/trailing whitespace
369+
370+
Converts:
371+
- HTML formatting to Markdown (tables, bold, italic, links, lists, etc.)
372+
373+
Preserves:
374+
- Markdown headers (##, ###, etc.)
375+
- Markdown formatting (bold, italic, lists, code blocks, etc.)
376+
377+
This ensures proper rendering in the React frontend which uses ReactMarkdown.
378+
Issue #655: Support all HTML formatting types, not just tables.
361379
"""
362380
# pylint: disable=import-outside-toplevel
363-
# Justification: Lazy import to avoid loading re module unless needed
364-
import re
381+
# Justification: Lazy import to avoid loading html2text unless needed
382+
import html2text
365383

366384
cleaned = answer.strip()
367385

368-
# Remove " AND " artifacts that come from query rewriting
386+
# Convert HTML to Markdown if HTML tags detected
387+
# Optimized: Use single pre-compiled regex pattern instead of 20+ searches
388+
if "<" in cleaned and ">" in cleaned and _HTML_TAG_PATTERN.search(cleaned):
389+
try:
390+
# Configure html2text for clean Markdown conversion
391+
h = html2text.HTML2Text()
392+
h.body_width = 0 # Don't wrap lines
393+
h.unicode_snob = True # Use Unicode characters
394+
h.ignore_links = False # Keep links
395+
h.ignore_images = False # Keep images
396+
h.ignore_emphasis = False # Keep bold/italic
397+
h.skip_internal_links = False # Keep all links
398+
h.inline_links = True # Use inline link format [text](url)
399+
h.protect_links = True # Don't mangle URLs
400+
h.wrap_links = False # Don't wrap links
401+
h.wrap_lists = False # Don't wrap lists
402+
403+
# Convert HTML to Markdown
404+
cleaned = h.handle(cleaned)
405+
except Exception as e:
406+
# If HTML conversion fails, log warning and continue with original text
407+
logger.warning("Failed to convert HTML to Markdown: %s", e)
408+
# Continue with original cleaned text (HTML tags will remain but ReactMarkdown handles them safely)
409+
410+
# STEP 1: Protect Markdown headers and formatting before cleaning
411+
# Extract and protect Markdown headers (##, ###, etc.)
412+
markdown_header_pattern = re.compile(r"^(#{1,6}\s+.+)$", re.MULTILINE)
413+
markdown_headers = markdown_header_pattern.findall(cleaned)
414+
header_placeholders = {}
415+
416+
# Replace all occurrences of each header to handle duplicates correctly
417+
for i, header in enumerate(markdown_headers):
418+
placeholder = f"__MDHEADER_{i}__"
419+
header_placeholders[placeholder] = header
420+
# Replace all occurrences of this header (not just first)
421+
cleaned = cleaned.replace(header, placeholder)
422+
423+
# STEP 2: Remove " AND " artifacts that come from query rewriting
369424
# Handle both middle "AND" and trailing "AND"
370425
cleaned = re.sub(r"\s+AND\s+", " ", cleaned) # Middle ANDs
371426
cleaned = re.sub(r"\s+AND$", "", cleaned) # Trailing AND
372427

373-
# Remove duplicate consecutive words
374-
words = cleaned.split()
375-
deduplicated_words = []
376-
prev_word = None
428+
# STEP 3: Remove duplicate consecutive words (but NOT in protected headers)
429+
lines = cleaned.split("\n")
430+
processed_lines = []
431+
432+
for line in lines:
433+
# Skip deduplication for lines with header placeholders
434+
if any(placeholder in line for placeholder in header_placeholders):
435+
processed_lines.append(line)
436+
continue
437+
438+
# Process regular lines
439+
words = line.split()
440+
deduplicated_words = []
441+
prev_word = None
442+
443+
for word in words:
444+
if not prev_word or word.lower() != prev_word.lower():
445+
deduplicated_words.append(word)
446+
prev_word = word # Always update prev_word for next iteration
447+
448+
processed_lines.append(" ".join(deduplicated_words))
449+
450+
result = "\n".join(processed_lines)
377451

378-
for word in words:
379-
if not prev_word or word.lower() != prev_word.lower():
380-
deduplicated_words.append(word)
381-
prev_word = word
452+
# STEP 4: Restore Markdown headers
453+
for placeholder, header in header_placeholders.items():
454+
result = result.replace(placeholder, header)
382455

383-
# Join back and clean up any multiple spaces
384-
result = " ".join(deduplicated_words)
385-
result = re.sub(r"\s+", " ", result).strip()
456+
# STEP 5: Clean up any multiple spaces (but preserve newlines)
457+
result = re.sub(r" +", " ", result) # Multiple spaces to single space
458+
result = result.strip()
386459

387460
return result
388461

backend/rag_solution/services/user_provider_service.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -122,19 +122,7 @@ def _create_default_rag_template(self, user_id: UUID4) -> PromptTemplateOutput:
122122
name="default-rag-template",
123123
user_id=user_id,
124124
template_type=PromptTemplateType.RAG_QUERY,
125-
system_prompt=(
126-
"You are a helpful AI assistant specializing in answering questions based on the given context. "
127-
"Answer ONLY the user's question that is provided. "
128-
"Do not generate additional questions or topics. "
129-
"Provide a single, focused, concise answer based on the context.\n\n"
130-
"Format your responses using Markdown for better readability:\n"
131-
"- Use **bold** for emphasis on key points\n"
132-
"- Use bullet points (- or *) for lists\n"
133-
"- Use numbered lists (1. 2. 3.) for sequential steps\n"
134-
"- Use `code blocks` for technical terms or code\n"
135-
"- Use proper headings (## or ###) for sections when appropriate\n"
136-
"- Keep answers well-structured and concise"
137-
),
125+
system_prompt="Answer the question based on the context using clear, well-formatted Markdown.",
138126
template_format="{context}\n\n{question}",
139127
input_variables={
140128
"context": "Retrieved context for answering the question",
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Deprecation Notice: watsonx.py
2+
3+
## Status: LEGACY - Scheduled for Removal
4+
5+
The `backend/vectordbs/utils/watsonx.py` file is a **duplicate** of the modern provider implementation at `backend/rag_solution/generation/providers/watsonx.py`.
6+
7+
## Current Usage
8+
9+
This legacy file is still used by:
10+
11+
1. `backend/rag_solution/evaluation/llm_as_judge_evals.py` - Uses `generate_batch`, `generate_text`, `get_model`
12+
2. `backend/rag_solution/data_ingestion/chunking.py` - Uses `get_tokenization` (deprecated function)
13+
3. `backend/rag_solution/query_rewriting/query_rewriter.py` - Uses `generate_text`
14+
4. `backend/tests/unit/test_settings_dependency_injection.py` - Test file
15+
16+
## Migration Plan
17+
18+
### Phase 1: Create Utility Wrappers (Recommended)
19+
20+
Create utility functions in the provider file that don't require user context for evaluation/utility use cases.
21+
22+
### Phase 2: Update Imports
23+
24+
Update all files to import from the provider file instead.
25+
26+
### Phase 3: Remove Duplicate
27+
28+
Delete `backend/vectordbs/utils/watsonx.py` once all imports are migrated.
29+
30+
## Why Keep It For Now?
31+
32+
These are utility functions used in:
33+
34+
- **Evaluation pipelines** (llm_as_judge_evals.py) - Runs outside normal request flow
35+
- **Data ingestion** (chunking.py) - Preprocessing, no user context
36+
- **Query rewriting** (query_rewriter.py) - Utility function
37+
38+
These don't have user context required by the modern LLMProviderFactory pattern.
39+
40+
## Recommendation
41+
42+
**DO NOT** remove this file until:
43+
44+
1. Utility wrapper functions are created in the provider
45+
2. All imports are updated and tested
46+
3. Evaluation and ingestion pipelines are verified to work
47+
48+
## Date: 2025-01-25
49+
50+
## Issue: Duplicate watsonx.py files identified during RAG improvements

0 commit comments

Comments
 (0)