Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

News Locality Calibration #103

Merged
merged 5 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/poprox_recommender/components/diversifiers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from poprox_recommender.components.diversifiers.calibration import Calibrator
from poprox_recommender.components.diversifiers.locality_calibration import LocalityCalibrator
from poprox_recommender.components.diversifiers.mmr import MMRDiversifier
from poprox_recommender.components.diversifiers.pfar import PFARDiversifier
from poprox_recommender.components.diversifiers.topic_calibration import TopicCalibrator

__all__ = ["MMRDiversifier", "PFARDiversifier", "TopicCalibrator"]
__all__ = ["MMRDiversifier", "PFARDiversifier", "Calibrator", "TopicCalibrator", "LocalityCalibrator"]
77 changes: 77 additions & 0 deletions src/poprox_recommender/components/diversifiers/calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from collections import defaultdict

import numpy as np

from poprox_concepts import Article
from poprox_recommender.lkpipeline import Component
from poprox_recommender.topics import normalized_category_count


# General calibration uses MMR
# to rerank recommendations according to
# certain calibration context (e.g. news topic, locality)
class Calibrator(Component):
def __init__(self, theta: float = 0.1, num_slots=10):
# Theta term controls the score and calibration tradeoff, the higher
# the theta the higher the resulting recommendation will be calibrated.
self.theta = theta
self.num_slots = num_slots

def __call__():
pass

def add_article_to_categories(self, rec_categories_with_candidate, article):
pass

def normalized_categories_with_candidate(self, rec_categories, article):
rec_categories_with_candidate = rec_categories.copy()
self.add_article_to_categories(rec_categories_with_candidate, article)
return normalized_category_count(rec_categories_with_candidate)

def calibration(self, relevance_scores, articles, preferences, theta, topk) -> list[Article]:
# MR_i = \theta * reward_i - (1 - \theta)*C(S + i) # C is calibration
# R is all candidates (not selected yet)

recommendations = [] # final recommendation (topk index)
rec_categories = defaultdict(int) # frequency distribution of categories of S

for _ in range(topk):
candidate = None # next item
best_candidate_score = float("-inf")

for article_idx, article_score in enumerate(relevance_scores): # iterate R for next item
if article_idx in recommendations:
continue

normalized_candidate_topics = self.normalized_categories_with_candidate(
rec_categories, articles[article_idx]
)
calibration = compute_kl_divergence(preferences, normalized_candidate_topics)

adjusted_candidate_score = (1 - theta) * article_score - (theta * calibration)
if adjusted_candidate_score > best_candidate_score:
best_candidate_score = adjusted_candidate_score
candidate = article_idx

if candidate is not None:
recommendations.append(candidate)
self.add_article_to_categories(rec_categories, articles[candidate])

return recommendations


# from https://github.com/CCRI-POPROX/poprox-recommender/blob/feature/experiment0/tests/test_calibration.ipynb
def compute_kl_divergence(interacted_distr, reco_distr, kl_div=0.0, alpha=0.01):
"""
KL (p || q), the lower the better.

alpha is not really a tuning parameter, it's just there to make the
computation more numerically stable.
"""
for category, score in interacted_distr.items():
reco_score = reco_distr.get(category, 0.0)
reco_score = (1 - alpha) * reco_score + alpha * score
if reco_score != 0.0:
kl_div += score * np.log2(score / reco_score)

return kl_div
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import torch as th

from poprox_concepts import ArticleSet, InterestProfile
from poprox_recommender.components.diversifiers.calibration import Calibrator
from poprox_recommender.topics import extract_locality, normalized_category_count


# Locality Calibration uses MMR
# to rerank recommendations according to
# locality calibration
class LocalityCalibrator(Calibrator):
def __init__(self, theta: float = 0.1, num_slots=10):
super().__init__(theta, num_slots)

def __call__(self, candidate_articles: ArticleSet, interest_profile: InterestProfile) -> ArticleSet:
normalized_locality_prefs = normalized_category_count(interest_profile.click_locality_counts)

if candidate_articles.scores is not None:
article_scores = th.sigmoid(th.tensor(candidate_articles.scores))
else:
article_scores = th.zeros(len(candidate_articles.articles))

article_scores = article_scores.cpu().detach().numpy()

article_indices = self.calibration(
article_scores,
candidate_articles.articles,
normalized_locality_prefs,
self.theta,
topk=self.num_slots,
)
return ArticleSet(articles=[candidate_articles.articles[int(idx)] for idx in article_indices])

def add_article_to_categories(self, rec_categories, article):
locality_list = extract_locality(article)
for locality in locality_list:
rec_categories[locality] = rec_categories.get(locality, 0) + 1
4 changes: 2 additions & 2 deletions src/poprox_recommender/components/diversifiers/pfar.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from poprox_concepts import Article, ArticleSet, InterestProfile
from poprox_recommender.lkpipeline import Component
from poprox_recommender.pytorch.decorators import torch_inference
from poprox_recommender.topics import GENERAL_TOPICS, extract_general_topics, normalized_topic_count
from poprox_recommender.topics import GENERAL_TOPICS, extract_general_topics, normalized_category_count


class PFARDiversifier(Component):
Expand All @@ -30,7 +30,7 @@ def __call__(self, candidate_articles: ArticleSet, interest_profile: InterestPro
for topic, click_count in interest_profile.click_topic_counts.items():
topic_preferences[topic] = click_count

normalized_topic_prefs = normalized_topic_count(topic_preferences)
normalized_topic_prefs = normalized_category_count(topic_preferences)

article_indices = pfar_diversification(
article_scores,
Expand Down
83 changes: 10 additions & 73 deletions src/poprox_recommender/components/diversifiers/topic_calibration.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
from collections import defaultdict

import numpy as np
import torch as th

from poprox_concepts import Article, ArticleSet, InterestProfile
from poprox_recommender.lkpipeline import Component
from poprox_recommender.topics import extract_general_topics, normalized_topic_count
from poprox_concepts import ArticleSet, InterestProfile
from poprox_recommender.components.diversifiers.calibration import Calibrator
from poprox_recommender.topics import extract_general_topics, normalized_category_count


# Topic Calibration uses MMR
# to rerank recommendations according to
# topic calibration
class TopicCalibrator(Component):
def __init__(self, theta: float = 0.1, num_slots=10):
# Theta term controls the score and calibration tradeoff, the higher
# the theta the higher the resulting recommendation will be calibrated.
self.theta = theta
self.num_slots = num_slots

class TopicCalibrator(Calibrator):
def __call__(self, candidate_articles: ArticleSet, interest_profile: InterestProfile) -> ArticleSet:
normalized_topic_prefs = self.compute_topic_dist(interest_profile)

Expand All @@ -28,7 +21,7 @@ def __call__(self, candidate_articles: ArticleSet, interest_profile: InterestPro

article_scores = article_scores.cpu().detach().numpy()

article_indices = topic_calibration(
article_indices = self.calibration(
article_scores,
candidate_articles.articles,
normalized_topic_prefs,
Expand All @@ -48,66 +41,10 @@ def compute_topic_dist(self, interest_profile):
for topic, click_count in interest_profile.click_topic_counts.items():
topic_preferences[topic] += click_count

normalized_topic_prefs = normalized_topic_count(topic_preferences)
normalized_topic_prefs = normalized_category_count(topic_preferences)
return normalized_topic_prefs


def topic_calibration(relevance_scores, articles, topic_preferences, theta, topk) -> list[Article]:
# MR_i = \theta * reward_i - (1 - \theta)*C(S + i) # C is calibration
# R is all candidates (not selected yet)

recommendations = [] # final recommendation (topk index)
rec_topics = defaultdict(int) # frequency distribution of topics of S

for k in range(topk):
candidate = None # next item
best_candidate_score = float("-inf")

for article_idx, article_score in enumerate(relevance_scores): # iterate R for next item
if article_idx in recommendations:
continue

normalized_candidate_topics = normalized_topics_with_candidate(rec_topics, articles[article_idx])
calibration = compute_kl_divergence(topic_preferences, normalized_candidate_topics)

adjusted_candidate_score = (1 - theta) * article_score - (theta * calibration)
if adjusted_candidate_score > best_candidate_score:
best_candidate_score = adjusted_candidate_score
candidate = article_idx

if candidate is not None:
recommendations.append(candidate)
add_article_to_topics(rec_topics, articles[candidate])

return recommendations


def add_article_to_topics(rec_topics, article):
topics = extract_general_topics(article)
for topic in topics:
rec_topics[topic] = rec_topics.get(topic, 0) + 1


def normalized_topics_with_candidate(rec_topics, article):
rec_topics_with_candidate = rec_topics.copy()
add_article_to_topics(rec_topics_with_candidate, article)
return normalized_topic_count(rec_topics_with_candidate)


# from https://github.com/CCRI-POPROX/poprox-recommender/blob/feature/experiment0/tests/test_calibration.ipynb
def compute_kl_divergence(interacted_distr, reco_distr):
"""
KL (p || q), the lower the better.

alpha is not really a tuning parameter, it's just there to make the
computation more numerically stable.
"""
kl_div = 0.0
alpha = 0.01
for genre, score in interacted_distr.items():
reco_score = reco_distr.get(genre, 0.0)
reco_score = (1 - alpha) * reco_score + alpha * score
if reco_score != 0.0:
kl_div += score * np.log2(score / reco_score)

return kl_div
def add_article_to_categories(self, rec_topics, article):
topics = extract_general_topics(article)
for topic in topics:
rec_topics[topic] = rec_topics.get(topic, 0) + 1
3 changes: 2 additions & 1 deletion src/poprox_recommender/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from poprox_concepts import ArticleSet
from poprox_concepts.api.recommendations import RecommendationRequest, RecommendationResponse
from poprox_recommender.recommenders import select_articles
from poprox_recommender.topics import user_topic_preference
from poprox_recommender.topics import user_locality_preference, user_topic_preference

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -49,6 +49,7 @@ def generate_recs(event, context):
clicked_articles = ArticleSet(articles=clicked_articles)

profile.click_topic_counts = user_topic_preference(req.past_articles, profile.click_history)
profile.click_locality_counts = user_locality_preference(req.past_articles, profile.click_history)

outputs = select_articles(
candidate_articles,
Expand Down
27 changes: 21 additions & 6 deletions src/poprox_recommender/recommenders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
from typing import Any

from poprox_concepts import ArticleSet, InterestProfile
from poprox_recommender.components.diversifiers import MMRDiversifier, PFARDiversifier, TopicCalibrator
from poprox_recommender.components.diversifiers import (
LocalityCalibrator,
MMRDiversifier,
PFARDiversifier,
TopicCalibrator,
)
from poprox_recommender.components.embedders import NRMSArticleEmbedder, NRMSUserEmbedder
from poprox_recommender.components.filters import TopicFilter
from poprox_recommender.components.joiners import Fill
Expand Down Expand Up @@ -85,7 +90,8 @@ def build_pipelines(num_slots: int, device: str) -> dict[str, Pipeline]:
topk_ranker = TopkRanker(num_slots=num_slots)
mmr = MMRDiversifier(num_slots=num_slots)
pfar = PFARDiversifier(num_slots=num_slots)
calibrator = TopicCalibrator(num_slots=num_slots)
locality_calibrator = LocalityCalibrator(num_slots=num_slots)
topic_calibrator = TopicCalibrator(num_slots=num_slots)
sampler = SoftmaxSampler(num_slots=num_slots, temperature=30.0)

nrms_pipe = build_pipeline(
Expand All @@ -112,11 +118,19 @@ def build_pipelines(num_slots: int, device: str) -> dict[str, Pipeline]:
num_slots=num_slots,
)

cali_pipe = build_pipeline(
"NRMS+Calibration",
topic_cali_pipe = build_pipeline(
"NRMS+Topic+Calibration",
article_embedder=article_embedder,
user_embedder=user_embedder,
ranker=calibrator,
ranker=topic_calibrator,
num_slots=num_slots,
)

locality_cali_pipe = build_pipeline(
"NRMS+Locality+Calibration",
article_embedder=article_embedder,
user_embedder=user_embedder,
ranker=locality_calibrator,
num_slots=num_slots,
)

Expand All @@ -132,7 +146,8 @@ def build_pipelines(num_slots: int, device: str) -> dict[str, Pipeline]:
"nrms": nrms_pipe,
"mmr": mmr_pipe,
"pfar": pfar_pipe,
"topic-cali": cali_pipe,
"topic-cali": topic_cali_pipe,
"locality-cali": locality_cali_pipe,
"softmax": softmax_pipe,
}

Expand Down
Loading
Loading