diff --git a/backend/core/mock_auth.py b/backend/core/mock_auth.py index 024de17f..76e5d017 100644 --- a/backend/core/mock_auth.py +++ b/backend/core/mock_auth.py @@ -13,7 +13,6 @@ from core.config import Settings, get_settings from core.identity_service import IdentityService -from rag_solution.core.exceptions import NotFoundError from rag_solution.schemas.user_schema import UserInput from rag_solution.services.user_service import UserService @@ -107,18 +106,17 @@ def is_bypass_mode_active() -> bool: def ensure_mock_user_exists(db: Session, settings: Settings, user_key: str = "default") -> UUID: # pylint: disable=unused-argument - """ - Ensure a mock user exists with full initialization. + """Ensure a mock user exists using standard user creation flow. - This function uses the UserService to properly create the user - with all required components: - - User record + This function uses the UserService.get_or_create_user() method to maintain + consistency with how OIDC and API users are created. The get_or_create_user() + method automatically handles: + - User record creation/retrieval - Prompt templates (RAG_QUERY, QUESTION_GENERATION, PODCAST_GENERATION) - LLM provider assignment - LLM parameters - Pipeline configuration - - Uses settings for configuration to ensure consistency across the application. + - Defensive reinitialization if defaults are missing Args: db: Database session @@ -127,39 +125,26 @@ def ensure_mock_user_exists(db: Session, settings: Settings, user_key: str = "de Returns: UUID: The user's ID - """ - # Get mock user configuration from settings (not os.getenv directly) - # This ensures consistency with create_mock_user_data() and get_current_user() - config = { - "ibm_id": os.getenv("MOCK_USER_IBM_ID", "mock-user-ibm-id"), # Still use env for IBM ID - "email": settings.mock_user_email, - "name": settings.mock_user_name, - "role": os.getenv("MOCK_USER_ROLE", "admin"), # Still use env for role - } + Note: + This method now uses the same code path as OIDC users (get_or_create_user) + instead of having separate logic for mock users. This ensures consistent + behavior across all authentication methods. + """ try: user_service = UserService(db, settings) - # Try to get existing user first - try: - existing_user = user_service.user_repository.get_by_ibm_id(str(config["ibm_id"])) - logger.debug("Mock user already exists: %s", existing_user.id) - return existing_user.id - except (NotFoundError, ValueError, AttributeError, TypeError): - # User doesn't exist, proceed to create - logger.debug("Mock user not found, will create new user") - - # Create new user with full initialization + # Use standardized user creation flow (same as OIDC/API users) user_input = UserInput( - ibm_id=str(config["ibm_id"]), - email=str(config["email"]), - name=str(config["name"]), - role=str(config["role"]), + ibm_id=os.getenv("MOCK_USER_IBM_ID", "mock-user-ibm-id"), + email=settings.mock_user_email, + name=settings.mock_user_name, + role=os.getenv("MOCK_USER_ROLE", "admin"), ) - logger.info("Creating mock user: %s", config["email"]) - user = user_service.create_user(user_input) - logger.info("Mock user created successfully: %s", user.id) + logger.info("Ensuring mock user exists: %s", user_input.email) + user = user_service.get_or_create_user(user_input) + logger.info("Mock user ready: %s", user.id) return user.id diff --git a/backend/rag_solution/services/user_service.py b/backend/rag_solution/services/user_service.py index ef95a56b..0f06de5a 100644 --- a/backend/rag_solution/services/user_service.py +++ b/backend/rag_solution/services/user_service.py @@ -8,10 +8,15 @@ from rag_solution.core.exceptions import NotFoundError, ValidationError from rag_solution.repository.user_repository import UserRepository from rag_solution.schemas.user_schema import UserInput, UserOutput +from rag_solution.services.prompt_template_service import PromptTemplateService from rag_solution.services.user_provider_service import UserProviderService logger = get_logger(__name__) +# Minimum number of required templates for user initialization +# Includes: RAG_QUERY, QUESTION_GENERATION, PODCAST_GENERATION +MIN_REQUIRED_TEMPLATES = 3 + class UserService: """Service for managing user-related operations.""" @@ -22,6 +27,7 @@ def __init__(self: Any, db: Session, settings: Settings) -> None: self.settings = settings self.user_repository = UserRepository(db) self.user_provider_service = UserProviderService(db, settings) + self.prompt_template_service = PromptTemplateService(db) def create_user(self, user_input: UserInput) -> UserOutput: """Creates a new user with validation. @@ -37,7 +43,7 @@ def create_user(self, user_input: UserInput) -> UserOutput: provider, templates, parameters = self.user_provider_service.initialize_user_defaults(user.id) # Validate that all required defaults were created (RAG, Question, Podcast) - if not provider or not templates or len(templates) < 3 or not parameters: + if not provider or not templates or len(templates) < MIN_REQUIRED_TEMPLATES or not parameters: self.db.rollback() raise ValidationError("Failed to initialize required user configuration") @@ -51,11 +57,57 @@ def get_or_create_user_by_fields(self, ibm_id: str, email: EmailStr, name: str, ) def get_or_create_user(self, user_input: UserInput) -> UserOutput: - """Gets existing user or creates new one from input model.""" + """Gets existing user or creates new one, ensuring all required defaults exist. + + This method provides defensive initialization to handle edge cases where users + may exist in the database but are missing required defaults (e.g., after database + wipes, failed initializations, or data migrations). + + Args: + user_input: User data for creation or lookup + + Returns: + UserOutput: User with all required defaults initialized + + Note: + Automatically reinitializes missing defaults (templates, parameters, pipelines) + for existing users. This adds one DB query per user access but prevents + silent failures during collection creation or search operations. + """ try: - return self.user_repository.get_by_ibm_id(user_input.ibm_id) + existing_user = self.user_repository.get_by_ibm_id(user_input.ibm_id) + + # Defensive check: Ensure user has required defaults + # Handles edge case where user exists after DB wipe but missing defaults + templates = self.prompt_template_service.get_user_templates(existing_user.id) + + if not templates or len(templates) < MIN_REQUIRED_TEMPLATES: + logger.warning( + "User %s exists but missing defaults (has %d/%d templates) - attempting recovery...", + existing_user.id, + len(templates) if templates else 0, + MIN_REQUIRED_TEMPLATES, + ) + try: + _, reinit_templates, parameters = self.user_provider_service.initialize_user_defaults( + existing_user.id + ) + logger.info( + "✅ Successfully recovered user %s: %d templates, %s parameters", + existing_user.id, + len(reinit_templates), + "created" if parameters else "failed", + ) + except Exception as e: + logger.error("❌ Failed to recover user %s: %s", existing_user.id, str(e)) + raise ValidationError( + f"User {existing_user.id} missing required defaults and recovery failed: {e}", + field="user_initialization", + ) from e + + return existing_user except NotFoundError: - # User doesn't exist, create a new one + # User doesn't exist, create with full initialization return self.create_user(user_input) def get_user_by_id(self, user_id: UUID4) -> UserOutput: diff --git a/backend/tests/integration/test_user_database.py b/backend/tests/integration/test_user_database.py index ef305cd3..1a5886a1 100644 --- a/backend/tests/integration/test_user_database.py +++ b/backend/tests/integration/test_user_database.py @@ -1,8 +1,12 @@ """ -Simplified version of test_user_database.py +Integration tests for user initialization and recovery after database operations. """ import pytest +from sqlalchemy import text + +from core.mock_auth import ensure_mock_user_exists +from rag_solution.services.prompt_template_service import PromptTemplateService @pytest.mark.integration @@ -22,3 +26,43 @@ def test_mock_services(self, mock_watsonx_provider): """Test mock services.""" assert mock_watsonx_provider is not None assert hasattr(mock_watsonx_provider, "generate_response") + + +@pytest.mark.integration +class TestUserInitializationRecovery: + """Integration tests for user initialization recovery after database wipes.""" + + def test_mock_user_initialization_after_db_wipe(self, db_session, integration_settings): + """Integration test: Mock user gets defaults even after DB wipe simulating template deletion.""" + # Create mock user with full initialization + user_id = ensure_mock_user_exists(db_session, integration_settings) + + # Verify templates exist + template_service = PromptTemplateService(db_session) + templates_before = template_service.get_user_templates(user_id) + assert len(templates_before) >= 3, f"Expected at least 3 templates, got {len(templates_before)}" + + # Simulate DB wipe (delete templates but keep user) + # This simulates what happens after scripts/wipe_database.py + db_session.execute(text("DELETE FROM prompt_templates WHERE user_id = :uid"), {"uid": str(user_id)}) + db_session.commit() + + # Verify templates were deleted + templates_after_wipe = template_service.get_user_templates(user_id) + assert len(templates_after_wipe) == 0, "Templates should be deleted after simulated wipe" + + # Call ensure_mock_user_exists again - should trigger defensive initialization + recovered_user_id = ensure_mock_user_exists(db_session, integration_settings) + assert recovered_user_id == user_id, "Should return same user ID" + + # Verify templates were recreated by defensive initialization + templates_after_recovery = template_service.get_user_templates(user_id) + assert len(templates_after_recovery) >= 3, ( + f"Expected at least 3 templates after recovery, got {len(templates_after_recovery)}" + ) + + # Verify we have all required template types + template_types = {t.template_type for t in templates_after_recovery} + assert "RAG_QUERY" in template_types, "Missing RAG_QUERY template" + assert "QUESTION_GENERATION" in template_types, "Missing QUESTION_GENERATION template" + assert "PODCAST_GENERATION" in template_types, "Missing PODCAST_GENERATION template" diff --git a/backend/tests/unit/test_user_service_tdd.py b/backend/tests/unit/test_user_service_tdd.py index 6f38fc44..d6f0f264 100644 --- a/backend/tests/unit/test_user_service_tdd.py +++ b/backend/tests/unit/test_user_service_tdd.py @@ -42,10 +42,12 @@ def service(self, mock_db, mock_settings): with ( patch("rag_solution.services.user_service.UserRepository"), patch("rag_solution.services.user_service.UserProviderService"), + patch("rag_solution.services.user_service.PromptTemplateService"), ): service = UserService(mock_db, mock_settings) service.user_repository = Mock() service.user_provider_service = Mock() + service.prompt_template_service = Mock() return service def test_create_user_success_red_phase(self, service, mock_db): @@ -151,7 +153,7 @@ def test_create_user_insufficient_templates_red_phase(self, service, mock_db): mock_db.rollback.assert_called_once() def test_get_or_create_user_existing_user_red_phase(self, service): - """RED: Test get_or_create when user already exists.""" + """RED: Test get_or_create when user already exists with sufficient templates.""" user_input = UserInput( ibm_id="existing_user", email="existing@example.com", @@ -172,11 +174,15 @@ def test_get_or_create_user_existing_user_red_phase(self, service): ) service.user_repository.get_by_ibm_id.return_value = existing_user + # Mock that user has 3 templates (sufficient) + service.prompt_template_service.get_user_templates.return_value = [Mock(), Mock(), Mock()] result = service.get_or_create_user(user_input) assert result is existing_user service.user_repository.get_by_ibm_id.assert_called_once_with("existing_user") + service.prompt_template_service.get_user_templates.assert_called_once_with(existing_user.id) + service.user_provider_service.initialize_user_defaults.assert_not_called() service.user_repository.create.assert_not_called() def test_get_or_create_user_new_user_red_phase(self, service, mock_db): # noqa: ARG002 @@ -211,6 +217,69 @@ def test_get_or_create_user_new_user_red_phase(self, service, mock_db): # noqa: service.user_repository.get_by_ibm_id.assert_called_once_with("new_user") service.user_repository.create.assert_called_once_with(user_input) + def test_get_or_create_user_missing_templates_reinitializes(self, service): + """Test that existing user with missing templates triggers reinitialization.""" + user_input = UserInput( + ibm_id="user1", email="user@test.com", name="User", role="user", preferred_provider_id=None + ) + user_id = uuid4() + existing_user = UserOutput( + id=user_id, + ibm_id="user1", + email="user@test.com", + name="User", + role="user", + preferred_provider_id=None, + created_at="2024-01-01T00:00:00Z", + updated_at="2024-01-01T00:00:00Z", + ) + + # Mock existing user but with < 3 templates + service.user_repository.get_by_ibm_id.return_value = existing_user + service.prompt_template_service.get_user_templates.return_value = [Mock()] # Only 1 template + + # Mock successful reinitialization + service.user_provider_service.initialize_user_defaults.return_value = ( + Mock(), # provider + [Mock(), Mock(), Mock()], # 3 templates + Mock(), # parameters + ) + + result = service.get_or_create_user(user_input) + + # Assert reinitialization was triggered + assert result is existing_user + service.prompt_template_service.get_user_templates.assert_called_once_with(existing_user.id) + service.user_provider_service.initialize_user_defaults.assert_called_once_with(existing_user.id) + + def test_get_or_create_user_with_sufficient_templates_skips_reinit(self, service): + """Test that existing user with 3+ templates skips reinitialization.""" + user_input = UserInput( + ibm_id="user1", email="user@test.com", name="User", role="user", preferred_provider_id=None + ) + user_id = uuid4() + existing_user = UserOutput( + id=user_id, + ibm_id="user1", + email="user@test.com", + name="User", + role="user", + preferred_provider_id=None, + created_at="2024-01-01T00:00:00Z", + updated_at="2024-01-01T00:00:00Z", + ) + + service.user_repository.get_by_ibm_id.return_value = existing_user + # User has 3 templates - sufficient + service.prompt_template_service.get_user_templates.return_value = [Mock(), Mock(), Mock()] + + result = service.get_or_create_user(user_input) + + # Assert reinitialization was NOT triggered + assert result is existing_user + service.prompt_template_service.get_user_templates.assert_called_once_with(existing_user.id) + service.user_provider_service.initialize_user_defaults.assert_not_called() + def test_get_or_create_user_by_fields_red_phase(self, service): """RED: Test get_or_create_user_by_fields convenience method.""" existing_user = UserOutput( @@ -225,6 +294,8 @@ def test_get_or_create_user_by_fields_red_phase(self, service): ) service.user_repository.get_by_ibm_id.return_value = existing_user + # Mock that user has 3 templates (sufficient) + service.prompt_template_service.get_user_templates.return_value = [Mock(), Mock(), Mock()] result = service.get_or_create_user_by_fields( ibm_id="field_user", email="field@example.com", name="Field User", role="admin" diff --git a/docs/development/user-initialization-architecture.md b/docs/development/user-initialization-architecture.md new file mode 100644 index 00000000..b108b797 --- /dev/null +++ b/docs/development/user-initialization-architecture.md @@ -0,0 +1,400 @@ +# User Initialization Architecture + +## Overview + +This document describes the unified user initialization architecture that ensures all users (mock, OIDC, API) receive proper defaults regardless of authentication method. + +## Problem Statement + +**Before:** Different code paths for mock vs. OIDC users led to: + +- Code duplication +- Inconsistent behavior +- Silent failures after database wipes +- Users missing prompt templates, parameters, or pipelines + +**After:** Single unified code path ensures consistency and defensive initialization. + +## Architecture Design + +### Core Principle + +**All user creation flows converge to `UserService.get_or_create_user()`** which guarantees proper initialization for both new and existing users. + +### Key Components + +``` +┌─────────────────────────────────────────────────────┐ +│ User Creation Flow │ +└─────────────────────────────────────────────────────┘ + │ + ┌───────────────┼───────────────┐ + │ │ │ + ┌────────┐ ┌─────────┐ ┌──────────┐ + │ Mock │ │ OIDC │ │ API │ + │ User │ │ User │ │ User │ + └────────┘ └─────────┘ └──────────┘ + │ │ │ + └───────────────┼───────────────┘ + ↓ + ┌─────────────────────────────────────┐ + │ UserService.get_or_create_user() │ + │ - Single source of truth │ + │ - Defensive initialization │ + │ - Self-healing for missing defaults │ + └─────────────────────────────────────┘ +``` + +## Implementation + +### 1. `UserService.get_or_create_user()` - The Core Method + +**Location:** `backend/rag_solution/services/user_service.py` + +```python +def get_or_create_user(self, user_input: UserInput) -> UserOutput: + """Gets existing user or creates new one, ensuring all required defaults exist. + + Provides defensive initialization to handle edge cases where users may exist + in the database but are missing required defaults (e.g., after database wipes, + failed initializations, or data migrations). + """ + try: + existing_user = self.user_repository.get_by_ibm_id(user_input.ibm_id) + + # Defensive check: Ensure user has required defaults + template_service = PromptTemplateService(self.db) + templates = template_service.get_user_templates(existing_user.id) + + if not templates or len(templates) < 3: + logger.warning( + "User %s exists but missing defaults (has %d templates) - reinitializing...", + existing_user.id, + len(templates) if templates else 0, + ) + user_provider_service = UserProviderService(self.db, self.settings) + _, reinit_templates, parameters = user_provider_service.initialize_user_defaults( + existing_user.id + ) + logger.info( + "Reinitialized user %s defaults: %d templates, %s parameters", + existing_user.id, + len(reinit_templates), + "created" if parameters else "failed", + ) + + return existing_user + except NotFoundError: + # User doesn't exist, create with full initialization + return self.create_user(user_input) +``` + +**Key Features:** + +- ✅ **Defensive initialization**: Checks for missing defaults +- ✅ **Self-healing**: Automatically reinitializes if needed +- ✅ **Database wipe safe**: Handles users without templates +- ✅ **Unified path**: Same for all authentication methods + +### 2. Mock User Initialization - Simplified + +**Location:** `backend/core/mock_auth.py` + +**Before (70+ lines):** + +```python +def ensure_mock_user_exists(db, settings): + # Custom user lookup + # Custom template check + # Custom initialization logic + # Lots of duplicate code + # Different from OIDC flow +``` + +**After (20 lines):** + +```python +def ensure_mock_user_exists(db, settings, user_key="default") -> UUID: + """Ensure mock user exists using standard user creation flow.""" + try: + user_service = UserService(db, settings) + + # Use standardized user creation flow (same as OIDC/API users) + user_input = UserInput( + ibm_id=os.getenv("MOCK_USER_IBM_ID", "mock-user-ibm-id"), + email=settings.mock_user_email, + name=settings.mock_user_name, + role=os.getenv("MOCK_USER_ROLE", "admin"), + ) + + logger.info("Ensuring mock user exists: %s", user_input.email) + user = user_service.get_or_create_user(user_input) + logger.info("Mock user ready: %s", user.id) + + return user.id + + except (ValueError, KeyError, AttributeError) as e: + logger.error("Failed to ensure mock user exists: %s", str(e)) + return IdentityService.get_mock_user_id() +``` + +**Improvements:** + +- ✅ **Removed ~50 lines** of duplicate code +- ✅ **Same code path** as OIDC users +- ✅ **Leverages** defensive initialization from `get_or_create_user()` +- ✅ **Simpler** and easier to maintain + +### 3. User Defaults Initialization + +**What gets initialized:** + +1. **Prompt Templates** (3 required): + - `RAG_QUERY` - For answering questions with RAG + - `QUESTION_GENERATION` - For generating suggested questions + - `PODCAST_GENERATION` - For creating podcast scripts + +2. **LLM Parameters:** + - `temperature` - Controls randomness (default: 0.7) + - `max_new_tokens` - Maximum response length (default: 2048) + - `top_p`, `top_k` - Sampling parameters + +3. **Pipeline Configuration:** + - Links templates and parameters + - Sets default search/generation behavior + +4. **Provider Assignment:** + - Links user to default LLM provider (WatsonX, OpenAI, or Anthropic) + +**Method:** `UserProviderService.initialize_user_defaults()` + +**Location:** `backend/rag_solution/services/user_provider_service.py` + +## User Creation Flows + +### All Authentication Methods + +| Authentication Type | Entry Point | Flow Path | +|-------------------|------------|-----------| +| **Mock User** (SKIP_AUTH=true) | `ensure_mock_user_exists()` | → `get_or_create_user()` → defensive check ✅ | +| **OIDC User** (First login) | OIDC callback handler | → `get_or_create_user()` → `create_user()` ✅ | +| **OIDC User** (Returning) | OIDC callback handler | → `get_or_create_user()` → defensive check ✅ | +| **API User** (Admin creates) | `POST /api/users/` | → `create_user()` ✅ | + +**Result:** All users get proper initialization, regardless of how they're created! 🎉 + +## Edge Cases Handled + +### 1. Database Wipe Scenario + +**Problem:** User exists but prompt_templates table is empty + +**Solution:** + +```python +# get_or_create_user() checks template count +if not templates or len(templates) < 3: + # Reinitialize defaults + user_provider_service.initialize_user_defaults(user_id) +``` + +**Result:** Templates automatically recreated on next login/startup + +### 2. Failed Initialization + +**Problem:** User created but template creation failed partway + +**Solution:** + +```python +# Defensive check catches incomplete initialization +if not templates or len(templates) < 3: + # Complete the initialization + user_provider_service.initialize_user_defaults(user_id) +``` + +**Result:** Self-healing - fixes itself on next access + +### 3. Data Migration + +**Problem:** Upgraded from version without podcast templates + +**Solution:** + +```python +# Check for minimum template count +if len(templates) < 3: + # Add missing templates + user_provider_service.initialize_user_defaults(user_id) +``` + +**Result:** Automatic migration to latest schema + +## Benefits + +### Code Quality + +- ✅ **DRY (Don't Repeat Yourself):** Single code path for all users +- ✅ **Maintainability:** Fix once, works for all auth methods +- ✅ **Testability:** Easier to test single flow +- ✅ **Readability:** Clear, simple logic + +### Reliability + +- ✅ **Consistency:** All users behave identically +- ✅ **Self-healing:** Automatically fixes missing defaults +- ✅ **Database wipe safe:** Recreates defaults on startup +- ✅ **Migration friendly:** Handles schema changes gracefully + +### Developer Experience + +- ✅ **No special cases:** Mock users = OIDC users = API users +- ✅ **Predictable:** Always know what to expect +- ✅ **Debuggable:** Single code path to follow +- ✅ **Documented:** Clear architecture pattern + +## Testing + +### Verification Steps + +1. **Check template count** after user creation: + + ```sql + SELECT COUNT(*) FROM prompt_templates WHERE user_id = ''; + -- Expected: 3 + ``` + +2. **Verify template types** exist: + + ```sql + SELECT template_type FROM prompt_templates WHERE user_id = ''; + -- Expected: RAG_QUERY, QUESTION_GENERATION, PODCAST_GENERATION + ``` + +3. **Check LLM parameters** exist: + + ```sql + SELECT id FROM llm_parameters WHERE user_id = ''; + -- Expected: 1 row + ``` + +4. **Verify pipeline** exists: + + ```sql + SELECT id FROM pipeline_configs WHERE user_id = ''; + -- Expected: 1 row + ``` + +### Test Scenarios + +#### Scenario 1: Mock User (First Time) + +```bash +# Clean database +python scripts/wipe_database.py --backup + +# Start backend (SKIP_AUTH=true) +make local-dev-backend + +# Verify +psql -d rag_modulo -c "SELECT COUNT(*) FROM prompt_templates;" +# Expected: 3 templates created +``` + +#### Scenario 2: Mock User (After Wipe) + +```bash +# User exists but templates wiped +# Start backend +make local-dev-backend + +# Verify defensive initialization triggered +# Check logs for: "User ... exists but missing defaults - reinitializing..." + +# Verify templates recreated +psql -d rag_modulo -c "SELECT COUNT(*) FROM prompt_templates;" +# Expected: 3 templates +``` + +#### Scenario 3: OIDC User (First Login) + +```bash +# Set SKIP_AUTH=false +# Login via IBM OIDC + +# Verify user created with defaults +psql -d rag_modulo -c "SELECT COUNT(*) FROM prompt_templates WHERE user_id = '';" +# Expected: 3 templates +``` + +## Migration Guide + +### From Old Architecture + +If you're upgrading from the old architecture with separate mock user logic: + +1. **No code changes needed** in application code +2. **Database schema** unchanged +3. **Environment variables** unchanged +4. **Existing users** will be self-healed on next access + +### Rollback Plan + +If issues arise: + +1. **Code rollback:** Revert to previous commit +2. **Database:** No schema changes, safe to rollback code +3. **Users:** Will continue working (old code compatible) + +## Performance Considerations + +### Cost: One Extra Query Per User Access + +```python +# Added check in get_or_create_user() +templates = template_service.get_user_templates(existing_user.id) +``` + +**Impact:** + +- **Query:** Simple SELECT with indexed user_id +- **Frequency:** Once per user login/access +- **Cost:** ~1-5ms (negligible) + +**Benefits outweigh cost:** + +- ✅ Prevents silent failures (hours of debugging) +- ✅ Self-healing (no manual intervention) +- ✅ Database wipe safe (automatic recovery) + +### Caching Opportunity (Future) + +Could cache template count per user to reduce queries: + +```python +# Future optimization +if not cache.get(f"user:{user_id}:templates_ok"): + templates = template_service.get_user_templates(user_id) + if len(templates) >= 3: + cache.set(f"user:{user_id}:templates_ok", True, ttl=3600) +``` + +## Related Documentation + +- [Authentication Bypass Architecture](../features/authentication-bypass.md) - Mock authentication +- [Service Layer Design](./service-layer.md) - Service architecture patterns +- [Database Management Scripts](../../scripts/README.md) - Wipe/restore procedures + +## References + +- **Primary Files:** + - `backend/rag_solution/services/user_service.py:53-103` + - `backend/core/mock_auth.py:109-155` + - `backend/rag_solution/services/user_provider_service.py:34-76` + +- **Related Issues:** + - GitHub #483: Enhanced health check for user defaults + - Original bug: Missing templates after database wipe + +- **Pull Requests:** + - Refactor: Unified user initialization architecture diff --git a/docs/features/authentication-bypass.md b/docs/features/authentication-bypass.md index e4215f1d..0fbcd824 100644 --- a/docs/features/authentication-bypass.md +++ b/docs/features/authentication-bypass.md @@ -124,6 +124,110 @@ When `SKIP_AUTH=false`: └─────────────┘ ``` +## User Initialization Architecture + +### Unified User Creation (All Authentication Methods) + +**Design Principle:** All users (mock, OIDC, API) use the same code path for initialization to ensure consistency and prevent silent failures. + +#### Single Source: `UserService.get_or_create_user()` + +All user creation flows converge to a single method that guarantees proper initialization: + +```python +# backend/rag_solution/services/user_service.py + +def get_or_create_user(self, user_input: UserInput) -> UserOutput: + """Gets existing user or creates new one, ensuring all required defaults exist. + + Provides defensive initialization to handle edge cases where users may exist + in the database but are missing required defaults (e.g., after database wipes, + failed initializations, or data migrations). + """ + try: + existing_user = self.user_repository.get_by_ibm_id(user_input.ibm_id) + + # Defensive check: Ensure user has required defaults + template_service = PromptTemplateService(self.db) + templates = template_service.get_user_templates(existing_user.id) + + if not templates or len(templates) < 3: + # User missing defaults - reinitialize + user_provider_service.initialize_user_defaults(existing_user.id) + + return existing_user + except NotFoundError: + # User doesn't exist, create with full initialization + return self.create_user(user_input) +``` + +**What gets initialized:** + +1. **Prompt Templates** (3 required): + - `RAG_QUERY` - For answering questions + - `QUESTION_GENERATION` - For generating suggested questions + - `PODCAST_GENERATION` - For creating podcast scripts + +2. **LLM Parameters:** + - Default generation settings (temperature, max_tokens, etc.) + +3. **Pipeline Configuration:** + - Default RAG pipeline linking templates and parameters + +4. **Provider Assignment:** + - Links user to default LLM provider (usually WatsonX) + +#### Mock User Creation (Simplified) + +**Before (Special Case):** + +```python +# Old approach - separate logic for mock users +def ensure_mock_user_exists(db, settings): + # Custom check for existing user + # Custom template initialization + # Lots of duplicate code + # Different behavior than OIDC users +``` + +**After (Unified):** + +```python +# New approach - uses same path as OIDC users +def ensure_mock_user_exists(db, settings): + """Ensure mock user exists using standard user creation flow.""" + user_service = UserService(db, settings) + user_input = UserInput( + ibm_id=os.getenv("MOCK_USER_IBM_ID", "mock-user-ibm-id"), + email=settings.mock_user_email, + name=settings.mock_user_name, + role=os.getenv("MOCK_USER_ROLE", "admin"), + ) + + # Same code path as OIDC users! + user = user_service.get_or_create_user(user_input) + return user.id +``` + +**Benefits:** + +- ✅ **Consistency**: Mock and OIDC users behave identically +- ✅ **Less code**: Removed ~50 lines of duplicate logic +- ✅ **Self-healing**: Automatically fixes users missing defaults +- ✅ **Database wipe safe**: Templates recreated on next startup +- ✅ **Better tested**: Single code path = easier to test + +#### All User Creation Paths + +| User Type | Entry Point | Flow | +|-----------|------------|------| +| **Mock User** (SKIP_AUTH=true) | `ensure_mock_user_exists()` | → `get_or_create_user()` → defensive check | +| **OIDC User** (First login) | OIDC callback | → `get_or_create_user()` → `create_user()` | +| **OIDC User** (Returning) | OIDC callback | → `get_or_create_user()` → defensive check | +| **API User** (Admin creates) | `POST /api/users/` | → `create_user()` | + +**Result:** All users get proper initialization, regardless of authentication method! 🎉 + ## Implementation Details ### Backend Components