diff --git a/app/api/endpoints/tokens.py b/app/api/endpoints/tokens.py index ca0ced7..299b926 100644 --- a/app/api/endpoints/tokens.py +++ b/app/api/endpoints/tokens.py @@ -21,6 +21,8 @@ class TokenRequest(BaseModel): catalogs: list[CatalogConfig] | None = Field(default=None, description="Optional catalog configuration") language: str = Field(default="en-US", description="Language for TMDB API") rpdb_key: str | None = Field(default=None, description="Optional RPDB API Key") + excluded_movie_genres: list[str] = Field(default_factory=list, description="List of movie genre IDs to exclude") + excluded_series_genres: list[str] = Field(default_factory=list, description="List of series genre IDs to exclude") class TokenResponse(BaseModel): @@ -130,6 +132,8 @@ async def create_token(payload: TokenRequest, request: Request) -> TokenResponse language=payload.language or default_settings.language, catalogs=payload.catalogs if payload.catalogs else default_settings.catalogs, rpdb_key=rpdb_key, + excluded_movie_genres=payload.excluded_movie_genres, + excluded_series_genres=payload.excluded_series_genres, ) # encode_settings now includes the "settings:" prefix diff --git a/app/core/settings.py b/app/core/settings.py index 38b4c4e..924d599 100644 --- a/app/core/settings.py +++ b/app/core/settings.py @@ -14,6 +14,8 @@ class UserSettings(BaseModel): catalogs: list[CatalogConfig] language: str = "en-US" rpdb_key: str | None = None + excluded_movie_genres: list[str] = [] + excluded_series_genres: list[str] = [] def encode_settings(settings: UserSettings) -> str: diff --git a/app/services/catalog.py b/app/services/catalog.py index 58eae4e..c9f1334 100644 --- a/app/services/catalog.py +++ b/app/services/catalog.py @@ -45,7 +45,9 @@ def build_catalog_entry(self, item, label, config_id): "extra": [], } - async def get_theme_based_catalogs(self, library_items: list[dict]) -> list[dict]: + async def get_theme_based_catalogs( + self, library_items: list[dict], user_settings: UserSettings | None = None + ) -> list[dict]: catalogs = [] # 1. Build User Profile # Combine loved and watched @@ -65,16 +67,27 @@ async def get_theme_based_catalogs(self, library_items: list[dict]) -> list[dict scored_obj = self.scoring_service.process_item(item_data) scored_objects.append(scored_obj) + # Get excluded genres + excluded_movie_genres = [] + excluded_series_genres = [] + if user_settings: + excluded_movie_genres = [int(g) for g in user_settings.excluded_movie_genres] + excluded_series_genres = [int(g) for g in user_settings.excluded_series_genres] + # 2. Generate Thematic Rows with Type-Specific Profiles # Generate for Movies - movie_profile = await self.user_profile_service.build_user_profile(scored_objects, content_type="movie") + movie_profile = await self.user_profile_service.build_user_profile( + scored_objects, content_type="movie", excluded_genres=excluded_movie_genres + ) movie_rows = await self.row_generator.generate_rows(movie_profile, "movie") for row in movie_rows: catalogs.append({"type": "movie", "id": row.id, "name": row.title, "extra": []}) # Generate for Series - series_profile = await self.user_profile_service.build_user_profile(scored_objects, content_type="series") + series_profile = await self.user_profile_service.build_user_profile( + scored_objects, content_type="series", excluded_genres=excluded_series_genres + ) series_rows = await self.row_generator.generate_rows(series_profile, "series") for row in series_rows: @@ -98,7 +111,7 @@ async def get_dynamic_catalogs( catalogs = [] if include_theme_based_rows: - catalogs.extend(await self.get_theme_based_catalogs(library_items)) + catalogs.extend(await self.get_theme_based_catalogs(library_items, user_settings)) # 3. Add Item-Based Rows if include_item_based_rows: diff --git a/app/services/discovery.py b/app/services/discovery.py index fd892aa..35d1c3d 100644 --- a/app/services/discovery.py +++ b/app/services/discovery.py @@ -14,7 +14,11 @@ def __init__(self): self.tmdb_service = TMDBService() async def discover_recommendations( - self, profile: UserTasteProfile, content_type: str, limit: int = 20 + self, + profile: UserTasteProfile, + content_type: str, + limit: int = 20, + excluded_genres: list[int] | None = None, ) -> list[dict]: """ Find content that matches the user's taste profile. @@ -33,17 +37,26 @@ async def discover_recommendations( top_crew = profile.get_top_crew(limit=1) # e.g. [(555, 1.0)] - Director top_countries = profile.get_top_countries(limit=2) + top_year = profile.get_top_year(limit=1) if not top_genres and not top_keywords and not top_cast: # Fallback if profile is empty return [] tasks = [] + base_params = {} + if excluded_genres: + base_params["without_genres"] = "|".join([str(g) for g in excluded_genres]) # Query 1: Top Genres Mix if top_genres: genre_ids = "|".join([str(g[0]) for g in top_genres]) - params_popular = {"with_genres": genre_ids, "sort_by": "popularity.desc", "vote_count.gte": 100} + params_popular = { + "with_genres": genre_ids, + "sort_by": "popularity.desc", + "vote_count.gte": 500, + **base_params, + } tasks.append(self._fetch_discovery(content_type, params_popular)) # fetch atleast two pages of results @@ -51,15 +64,21 @@ async def discover_recommendations( params_rating = { "with_genres": genre_ids, "sort_by": "ratings.desc", - "vote_count.gte": 300, + "vote_count.gte": 500, "page": i + 1, + **base_params, } tasks.append(self._fetch_discovery(content_type, params_rating)) # Query 2: Top Keywords if top_keywords: keyword_ids = "|".join([str(k[0]) for k in top_keywords]) - params_keywords = {"with_keywords": keyword_ids, "sort_by": "popularity.desc"} + params_keywords = { + "with_keywords": keyword_ids, + "sort_by": "popularity.desc", + "vote_count.gte": 500, + **base_params, + } tasks.append(self._fetch_discovery(content_type, params_keywords)) # fetch atleast two pages of results @@ -67,18 +86,29 @@ async def discover_recommendations( params_rating = { "with_keywords": keyword_ids, "sort_by": "ratings.desc", - "vote_count.gte": 300, + "vote_count.gte": 500, "page": i + 1, + **base_params, } tasks.append(self._fetch_discovery(content_type, params_rating)) # Query 3: Top Actors for actor in top_cast: actor_id = actor[0] - params_actor = {"with_cast": str(actor_id), "sort_by": "popularity.desc"} + params_actor = { + "with_cast": str(actor_id), + "sort_by": "popularity.desc", + "vote_count.gte": 500, + **base_params, + } tasks.append(self._fetch_discovery(content_type, params_actor)) - params_rating = {"with_cast": str(actor_id), "sort_by": "ratings.desc", "vote_count.gte": 300} + params_rating = { + "with_cast": str(actor_id), + "sort_by": "ratings.desc", + "vote_count.gte": 500, + **base_params, + } tasks.append(self._fetch_discovery(content_type, params_rating)) # Query 4: Top Director @@ -87,19 +117,47 @@ async def discover_recommendations( params_director = { "with_crew": str(director_id), "sort_by": "vote_average.desc", # Directors imply quality preference + "vote_count.gte": 500, + **base_params, } tasks.append(self._fetch_discovery(content_type, params_director)) - params_rating = {"with_crew": str(director_id), "sort_by": "ratings.desc", "vote_count.gte": 300} + params_rating = { + "with_crew": str(director_id), + "sort_by": "ratings.desc", + "vote_count.gte": 500, + **base_params, + } tasks.append(self._fetch_discovery(content_type, params_rating)) # Query 5: Top Countries if top_countries: country_ids = "|".join([str(c[0]) for c in top_countries]) - params_country = {"with_origin_country": country_ids, "sort_by": "popularity.desc", "vote_count.gte": 100} + params_country = { + "with_origin_country": country_ids, + "sort_by": "popularity.desc", + "vote_count.gte": 100, + **base_params, + } tasks.append(self._fetch_discovery(content_type, params_country)) - params_rating = {"with_origin_country": country_ids, "sort_by": "ratings.desc", "vote_count.gte": 300} + params_rating = { + "with_origin_country": country_ids, + "sort_by": "ratings.desc", + "vote_count.gte": 300, + **base_params, + } + tasks.append(self._fetch_discovery(content_type, params_rating)) + + # query 6: Top year + if top_year: + year = top_year[0][0] + params_rating = { + "year": year, + "sort_by": "ratings.desc", + "vote_count.gte": 500, + **base_params, + } tasks.append(self._fetch_discovery(content_type, params_rating)) # 3. Execute Parallel Queries diff --git a/app/services/recommendation_service.py b/app/services/recommendation_service.py index 7f929ec..e54220b 100644 --- a/app/services/recommendation_service.py +++ b/app/services/recommendation_service.py @@ -254,6 +254,15 @@ async def get_recommendations_for_item(self, item_id: str) -> list[dict]: # 1. Filter by TMDB ID recommendations = await self._filter_candidates(recommendations, watched_imdb, watched_tmdb) + # 1.5 Filter by Excluded Genres + # We need to detect content_type from item_id or media_type to know which exclusion list to use. + # media_type is already resolved above. + excluded_ids = set(self._get_excluded_genre_ids(media_type)) + if excluded_ids: + recommendations = [ + item for item in recommendations if not excluded_ids.intersection(item.get("genre_ids") or []) + ] + # 2. Fetch Metadata (gets IMDB IDs) meta_items = await self._fetch_metadata_for_items(recommendations, media_type) @@ -278,6 +287,15 @@ async def get_recommendations_for_item(self, item_id: str) -> list[dict]: logger.info(f"Found {len(final_items)} valid recommendations for {item_id}") return final_items + def _get_excluded_genre_ids(self, content_type: str) -> list[int]: + if not self.user_settings: + return [] + if content_type == "movie": + return [int(g) for g in self.user_settings.excluded_movie_genres] + elif content_type in ["series", "tv"]: + return [int(g) for g in self.user_settings.excluded_series_genres] + return [] + async def get_recommendations_for_theme(self, theme_id: str, content_type: str, limit: int = 20) -> list[dict]: """ Parse a dynamic theme ID and fetch recommendations. @@ -315,6 +333,16 @@ async def get_recommendations_for_theme(self, theme_id: str, content_type: str, if "sort_by" not in params: params["sort_by"] = "popularity.desc" + # Apply Excluded Genres + excluded_ids = self._get_excluded_genre_ids(content_type) + if excluded_ids: + # If with_genres is specified, we technically shouldn't exclude what is explicitly asked for? + # But the user asked to "exclude those genres". + # If I exclude them from "without_genres", TMDB might return 0 results if the theme IS that genre. + # But RowGenerator safeguards against generating themes for excluded genres. + # So this is safe for keyword/country rows. + params["without_genres"] = "|".join(str(g) for g in excluded_ids) + # Fetch recommendations = await self.tmdb_service.get_discover(content_type, **params) candidates = recommendations.get("results", []) @@ -407,15 +435,25 @@ async def get_recommendations( tasks_a.append(self._fetch_recommendations_from_tmdb(source.get("_id"), source.get("type"), limit=10)) similarity_candidates = [] similarity_recommendations = await asyncio.gather(*tasks_a, return_exceptions=True) + + excluded_ids = set(self._get_excluded_genre_ids(content_type)) + similarity_recommendations = [item for item in similarity_recommendations if not isinstance(item, Exception)] - for item in similarity_recommendations: - similarity_candidates.extend(item) + for batch in similarity_recommendations: + similarity_candidates.extend( + item for item in batch if not excluded_ids.intersection(item.get("genre_ids") or []) + ) # --- Candidate Set B: Profile-based Discovery --- + # Extract excluded genres + excluded_genres = list(excluded_ids) # Convert back to list for consistency + # Use typed profile based on content_type - user_profile = await self.user_profile_service.build_user_profile(scored_objects, content_type=content_type) + user_profile = await self.user_profile_service.build_user_profile( + scored_objects, content_type=content_type, excluded_genres=excluded_genres + ) discovery_candidates = await self.discovery_engine.discover_recommendations( - user_profile, content_type, limit=20 + user_profile, content_type, limit=20, excluded_genres=excluded_genres ) # --- Combine & Deduplicate --- diff --git a/app/services/user_profile.py b/app/services/user_profile.py index 6495136..3d21ff8 100644 --- a/app/services/user_profile.py +++ b/app/services/user_profile.py @@ -38,7 +38,10 @@ def __init__(self): self.tmdb_service = TMDBService() async def build_user_profile( - self, scored_items: list[ScoredItem], content_type: str | None = None + self, + scored_items: list[ScoredItem], + content_type: str | None = None, + excluded_genres: list[int] | None = None, ) -> UserTasteProfile: """ Aggregates multiple item vectors into a single User Taste Profile. @@ -76,7 +79,7 @@ async def build_user_profile( # Scale by Interest Score (0.0 - 1.0) interest_weight = item.score / 100.0 - self._merge_vector(profile_data, item_vector, interest_weight) + self._merge_vector(profile_data, item_vector, interest_weight, excluded_genres) # Convert to Pydantic Model profile = UserTasteProfile( @@ -206,7 +209,13 @@ def _vectorize_item(self, meta: dict) -> dict[str, list[int] | int | list[str] | return vector - def _merge_vector(self, profile: dict, item_vector: dict, weight: float): + def _merge_vector( + self, + profile: dict, + item_vector: dict, + weight: float, + excluded_genres: list[int] | None = None, + ): """Merges an item's sparse vector into the main profile with a weight.""" # Weights for specific dimensions (Feature Importance) @@ -228,6 +237,8 @@ def _merge_vector(self, profile: dict, item_vector: dict, weight: float): profile["years"][ids] += final_weight elif ids: for feature_id in ids: + if dim == "genres" and excluded_genres and feature_id in excluded_genres: + continue profile[dim][feature_id] += final_weight async def _fetch_full_metadata(self, tmdb_id: int, type_: str) -> dict | None: diff --git a/static/index.html b/static/index.html index fe9ab9a..1c4785c 100644 --- a/static/index.html +++ b/static/index.html @@ -208,6 +208,31 @@
+ +