Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ class Settings(BaseSettings):
reranker_type: Annotated[str, Field(default="llm", alias="RERANKER_TYPE")] # Options: llm, simple, cross-encoder
reranker_top_k: Annotated[
int | None, Field(default=5, alias="RERANKER_TOP_K")
] # Default 5 for optimal quality/speed
] # Number of top results to return after reranking
reranker_batch_size: Annotated[int, Field(default=10, alias="RERANKER_BATCH_SIZE")]
reranker_score_scale: Annotated[int, Field(default=10, alias="RERANKER_SCORE_SCALE")] # 0-10 scoring scale
reranker_prompt_template_name: Annotated[
Expand Down
43 changes: 30 additions & 13 deletions backend/rag_solution/services/answer_synthesizer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""Answer synthesizer component for Chain of Thought reasoning."""

from core.config import Settings, get_settings
from core.logging_utils import get_logger
from rag_solution.generation.providers.base import LLMBase
from rag_solution.schemas.chain_of_thought_schema import ReasoningStep, SynthesisResult

logger = get_logger(__name__)


class AnswerSynthesizer:
"""Component for synthesizing answers from reasoning steps."""
Expand All @@ -19,14 +22,14 @@ def __init__(self, llm_service: LLMBase | None = None, settings: Settings | None
self.settings = settings or get_settings()

def synthesize(self, original_question: str, reasoning_steps: list[ReasoningStep]) -> str:
"""Synthesize a final answer from reasoning steps.
"""Synthesize a final answer from reasoning steps with proper Markdown formatting.

Args:
original_question: The original question.
reasoning_steps: The reasoning steps taken.

Returns:
The synthesized final answer.
The synthesized final answer formatted in Markdown.
"""
if not reasoning_steps:
return "Unable to generate an answer due to insufficient information."
Expand All @@ -37,22 +40,36 @@ def synthesize(self, original_question: str, reasoning_steps: list[ReasoningStep
if not intermediate_answers:
return "Unable to synthesize an answer from the reasoning steps."

# Simple synthesis (in production, this would use an LLM)
# Simple synthesis with Markdown formatting
if len(intermediate_answers) == 1:
return intermediate_answers[0]

# Combine multiple answers
synthesis = f"Based on the analysis of {original_question}: "
# Combine multiple answers with proper Markdown structure
synthesis_parts = []

for i, answer in enumerate(intermediate_answers):
if i == 0:
synthesis += answer
elif i == len(intermediate_answers) - 1:
synthesis += f" Additionally, {answer.lower()}"
else:
synthesis += f" Furthermore, {answer.lower()}"
# Add main answer header
synthesis_parts.append(f"## Answer to: {original_question}\n")

# Add each reasoning step as a section
for i, (step, answer) in enumerate(zip(reasoning_steps, intermediate_answers, strict=False), 1):
synthesis_parts.append(f"### Step {i}: {step.question}\n")
synthesis_parts.append(f"{answer}\n")

# Add summary section if multiple steps
if len(intermediate_answers) > 1:
synthesis_parts.append("### Summary\n")
synthesis_parts.append(f"Based on the analysis above, {intermediate_answers[0]}")

remaining_answers = intermediate_answers[1:]
for i, answer in enumerate(remaining_answers):
if i == len(remaining_answers) - 1:
# Last item in remaining answers
synthesis_parts.append(f" Additionally, {answer.lower()}")
else:
synthesis_parts.append(f" Furthermore, {answer.lower()}")

return synthesis
result = "\n".join(synthesis_parts)
return result

async def synthesize_answer(self, original_question: str, reasoning_steps: list[ReasoningStep]) -> SynthesisResult:
"""Synthesize answer and return result object like tests expect.
Expand Down
23 changes: 21 additions & 2 deletions backend/rag_solution/services/chain_of_thought_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,19 @@ def _generate_llm_response(
logger.warning("LLM service %s does not have generate_text_with_usage method", type(llm_service))
return f"Based on the context, {question.lower().replace('?', '')}...", None

# Create a proper prompt with context
prompt = f"Question: {question}\n\nContext: {' '.join(context)}\n\nAnswer:"
# Create a proper prompt with context and request Markdown formatting
prompt = f"""Question: {question}

Context: {" ".join(context)}

Please provide a detailed answer using proper Markdown formatting:
- Use ## for main headers
- Use ### for sub-headers
- Use bullet points (-) for lists
- Use **bold** for emphasis
- Use tables when presenting data

Answer:"""

try:
from rag_solution.schemas.llm_usage_schema import ServiceType
Expand Down Expand Up @@ -462,6 +473,14 @@ async def execute_chain_of_thought(
# Synthesize final answer
final_answer = self.answer_synthesizer.synthesize(cot_input.question, reasoning_steps)

# DEBUG: Log final answer before returning
logger.info("🔍 COT_SERVICE: Final answer length: %d chars", len(final_answer))
logger.debug("🔍 COT_SERVICE: First 200 chars: %s", final_answer[:200])
if "##" in final_answer or "###" in final_answer:
logger.info("✅ COT_SERVICE: Markdown headers present in final answer")
else:
logger.warning("⚠️ COT_SERVICE: NO Markdown headers in final answer")

# Generate source summary
source_summary = self.source_attribution_service.aggregate_sources_across_steps(reasoning_steps)

Expand Down
33 changes: 32 additions & 1 deletion backend/rag_solution/services/prompt_template_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,41 @@ def format_prompt_with_template(self, template: PromptTemplateBase, variables: d
raise ValidationError(f"Failed to format prompt: {e!s}") from e

def _format_prompt_with_template(self, template: PromptTemplateBase, variables: dict[str, Any]) -> str:
"""Internal method to format prompt with a template object."""
"""Internal method to format prompt with a template object.

For RAG_QUERY templates, automatically appends Markdown formatting instructions
to ensure well-structured, readable responses.

Issue #655: Add explicit Markdown formatting requests to improve LLM output structure.
"""
parts = []
if template.system_prompt:
parts.append(str(template.system_prompt))

# Add Markdown formatting instructions for RAG_QUERY and COT_REASONING templates (Issue #655)
if template.template_type in (PromptTemplateType.RAG_QUERY, PromptTemplateType.COT_REASONING):
markdown_instructions = (
"\n\nIMPORTANT - Response Formatting Requirements:\n"
"Format your response in clean, well-structured Markdown:\n"
"- Use ## for main sections and ### for subsections\n"
"- Separate paragraphs with blank lines for better readability\n"
"- Keep paragraphs concise (2-4 sentences maximum)\n"
"- Use bullet points (-) for lists of items\n"
"- Use numbered lists (1., 2., 3.) for sequential steps\n"
"- Use **bold** for key concepts and important terms\n"
"- Use *italic* for emphasis or definitions\n"
"- Format tables using Markdown table syntax with | separators\n"
"- Add blank lines before and after tables, lists, and code blocks\n"
"\nExample structure:\n"
"## Main Topic\n\n"
"Brief introduction paragraph.\n\n"
"### Key Points\n\n"
"- First important point\n"
"- Second important point\n\n"
"Explanatory paragraph with **key terms** highlighted."
)
parts.append(markdown_instructions)

parts.append(template.template_format.format(**variables))
return "\n\n".join(parts)

Expand Down
105 changes: 89 additions & 16 deletions backend/rag_solution/services/search_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# pylint: disable=too-many-lines
# Justification: Search service orchestrates multiple complex search paths

import re
import time
from collections.abc import Callable
from functools import wraps
Expand Down Expand Up @@ -42,6 +43,13 @@

logger = get_logger("services.search")

# Pre-compiled regex pattern for HTML tag detection (optimized for performance)
# Matches common HTML tags: table, div, p, b, strong, em, i, a, ul, ol, li, h1-h6, code, pre, blockquote, img, br, hr
_HTML_TAG_PATTERN = re.compile(
r"<(?:table|div|p|b|strong|em|i|a\s|ul|ol|li|h[1-6]|code|pre|blockquote|img\s|br|hr)",
re.IGNORECASE,
)

T = TypeVar("T")
P = ParamSpec("P")

Expand Down Expand Up @@ -352,37 +360,102 @@ def _generate_document_metadata(

def _clean_generated_answer(self, answer: str) -> str:
"""
Clean generated answer by removing artifacts and duplicates.
Clean generated answer by removing artifacts and duplicates while preserving Markdown.

Removes:
- " AND " artifacts from query rewriting
- Duplicate consecutive words
- Duplicate consecutive words (except in Markdown headers)
- Leading/trailing whitespace

Converts:
- HTML formatting to Markdown (tables, bold, italic, links, lists, etc.)

Preserves:
- Markdown headers (##, ###, etc.)
- Markdown formatting (bold, italic, lists, code blocks, etc.)

This ensures proper rendering in the React frontend which uses ReactMarkdown.
Issue #655: Support all HTML formatting types, not just tables.
"""
# pylint: disable=import-outside-toplevel
# Justification: Lazy import to avoid loading re module unless needed
import re
# Justification: Lazy import to avoid loading html2text unless needed
import html2text

cleaned = answer.strip()

# Remove " AND " artifacts that come from query rewriting
# Convert HTML to Markdown if HTML tags detected
# Optimized: Use single pre-compiled regex pattern instead of 20+ searches
if "<" in cleaned and ">" in cleaned and _HTML_TAG_PATTERN.search(cleaned):
try:
# Configure html2text for clean Markdown conversion
h = html2text.HTML2Text()
h.body_width = 0 # Don't wrap lines
h.unicode_snob = True # Use Unicode characters
h.ignore_links = False # Keep links
h.ignore_images = False # Keep images
h.ignore_emphasis = False # Keep bold/italic
h.skip_internal_links = False # Keep all links
h.inline_links = True # Use inline link format [text](url)
h.protect_links = True # Don't mangle URLs
h.wrap_links = False # Don't wrap links
h.wrap_lists = False # Don't wrap lists

# Convert HTML to Markdown
cleaned = h.handle(cleaned)
except Exception as e:
# If HTML conversion fails, log warning and continue with original text
logger.warning("Failed to convert HTML to Markdown: %s", e)
# Continue with original cleaned text (HTML tags will remain but ReactMarkdown handles them safely)

# STEP 1: Protect Markdown headers and formatting before cleaning
# Extract and protect Markdown headers (##, ###, etc.)
markdown_header_pattern = re.compile(r"^(#{1,6}\s+.+)$", re.MULTILINE)
markdown_headers = markdown_header_pattern.findall(cleaned)
header_placeholders = {}

# Replace all occurrences of each header to handle duplicates correctly
for i, header in enumerate(markdown_headers):
placeholder = f"__MDHEADER_{i}__"
header_placeholders[placeholder] = header
# Replace all occurrences of this header (not just first)
cleaned = cleaned.replace(header, placeholder)

# STEP 2: Remove " AND " artifacts that come from query rewriting
# Handle both middle "AND" and trailing "AND"
cleaned = re.sub(r"\s+AND\s+", " ", cleaned) # Middle ANDs
cleaned = re.sub(r"\s+AND$", "", cleaned) # Trailing AND

# Remove duplicate consecutive words
words = cleaned.split()
deduplicated_words = []
prev_word = None
# STEP 3: Remove duplicate consecutive words (but NOT in protected headers)
lines = cleaned.split("\n")
processed_lines = []

for line in lines:
# Skip deduplication for lines with header placeholders
if any(placeholder in line for placeholder in header_placeholders):
processed_lines.append(line)
continue

# Process regular lines
words = line.split()
deduplicated_words = []
prev_word = None

for word in words:
if not prev_word or word.lower() != prev_word.lower():
deduplicated_words.append(word)
prev_word = word # Always update prev_word for next iteration

processed_lines.append(" ".join(deduplicated_words))

result = "\n".join(processed_lines)

for word in words:
if not prev_word or word.lower() != prev_word.lower():
deduplicated_words.append(word)
prev_word = word
# STEP 4: Restore Markdown headers
for placeholder, header in header_placeholders.items():
result = result.replace(placeholder, header)

# Join back and clean up any multiple spaces
result = " ".join(deduplicated_words)
result = re.sub(r"\s+", " ", result).strip()
# STEP 5: Clean up any multiple spaces (but preserve newlines)
result = re.sub(r" +", " ", result) # Multiple spaces to single space
result = result.strip()

return result

Expand Down
14 changes: 1 addition & 13 deletions backend/rag_solution/services/user_provider_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,19 +122,7 @@ def _create_default_rag_template(self, user_id: UUID4) -> PromptTemplateOutput:
name="default-rag-template",
user_id=user_id,
template_type=PromptTemplateType.RAG_QUERY,
system_prompt=(
"You are a helpful AI assistant specializing in answering questions based on the given context. "
"Answer ONLY the user's question that is provided. "
"Do not generate additional questions or topics. "
"Provide a single, focused, concise answer based on the context.\n\n"
"Format your responses using Markdown for better readability:\n"
"- Use **bold** for emphasis on key points\n"
"- Use bullet points (- or *) for lists\n"
"- Use numbered lists (1. 2. 3.) for sequential steps\n"
"- Use `code blocks` for technical terms or code\n"
"- Use proper headings (## or ###) for sections when appropriate\n"
"- Keep answers well-structured and concise"
),
system_prompt="Answer the question based on the context using clear, well-formatted Markdown.",
template_format="{context}\n\n{question}",
input_variables={
"context": "Retrieved context for answering the question",
Expand Down
50 changes: 50 additions & 0 deletions backend/vectordbs/utils/DEPRECATION_NOTICE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Deprecation Notice: watsonx.py

## Status: LEGACY - Scheduled for Removal

The `backend/vectordbs/utils/watsonx.py` file is a **duplicate** of the modern provider implementation at `backend/rag_solution/generation/providers/watsonx.py`.

## Current Usage

This legacy file is still used by:

1. `backend/rag_solution/evaluation/llm_as_judge_evals.py` - Uses `generate_batch`, `generate_text`, `get_model`
2. `backend/rag_solution/data_ingestion/chunking.py` - Uses `get_tokenization` (deprecated function)
3. `backend/rag_solution/query_rewriting/query_rewriter.py` - Uses `generate_text`
4. `backend/tests/unit/test_settings_dependency_injection.py` - Test file

## Migration Plan

### Phase 1: Create Utility Wrappers (Recommended)

Create utility functions in the provider file that don't require user context for evaluation/utility use cases.

### Phase 2: Update Imports

Update all files to import from the provider file instead.

### Phase 3: Remove Duplicate

Delete `backend/vectordbs/utils/watsonx.py` once all imports are migrated.

## Why Keep It For Now?

These are utility functions used in:

- **Evaluation pipelines** (llm_as_judge_evals.py) - Runs outside normal request flow
- **Data ingestion** (chunking.py) - Preprocessing, no user context
- **Query rewriting** (query_rewriter.py) - Utility function

These don't have user context required by the modern LLMProviderFactory pattern.

## Recommendation

**DO NOT** remove this file until:

1. Utility wrapper functions are created in the provider
2. All imports are updated and tested
3. Evaluation and ingestion pipelines are verified to work

## Date: 2025-01-25

## Issue: Duplicate watsonx.py files identified during RAG improvements
Loading
Loading