Skip to content

Commit

Permalink
News Locality Calibration (CCRI-POPROX#103)
Browse files Browse the repository at this point in the history
Apply some refactoring by creating a `Calibrator` object that both
`TopicCalibration` and the new `LocalityCalibration` inherit from.
Updated current `test_calibration` test suites.

Some TODOs we need to figure out for next step:

- [x] Add test for current logic
- [ ] Tune the parameter of calibration (The current params are borrowed
from topic calibration)
- [ ] Run the code on an S3 instance and call necessary endpoints from
POPROX
- [ ] Figure out how to connect the logic to participant selection (who
will be included in this experiment?)
- [ ] Integrate the LLM context generation into calibrated articles
  • Loading branch information
sophiasun0515 authored Oct 2, 2024
1 parent 114bc34 commit 2fd4b13
Show file tree
Hide file tree
Showing 10 changed files with 258 additions and 86 deletions.
4 changes: 3 additions & 1 deletion src/poprox_recommender/components/diversifiers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from poprox_recommender.components.diversifiers.calibration import Calibrator
from poprox_recommender.components.diversifiers.locality_calibration import LocalityCalibrator
from poprox_recommender.components.diversifiers.mmr import MMRDiversifier
from poprox_recommender.components.diversifiers.pfar import PFARDiversifier
from poprox_recommender.components.diversifiers.topic_calibration import TopicCalibrator

__all__ = ["MMRDiversifier", "PFARDiversifier", "TopicCalibrator"]
__all__ = ["MMRDiversifier", "PFARDiversifier", "Calibrator", "TopicCalibrator", "LocalityCalibrator"]
77 changes: 77 additions & 0 deletions src/poprox_recommender/components/diversifiers/calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from collections import defaultdict

import numpy as np

from poprox_concepts import Article
from poprox_recommender.lkpipeline import Component
from poprox_recommender.topics import normalized_category_count


# General calibration uses MMR
# to rerank recommendations according to
# certain calibration context (e.g. news topic, locality)
class Calibrator(Component):
def __init__(self, theta: float = 0.1, num_slots=10):
# Theta term controls the score and calibration tradeoff, the higher
# the theta the higher the resulting recommendation will be calibrated.
self.theta = theta
self.num_slots = num_slots

def __call__():
pass

def add_article_to_categories(self, rec_categories_with_candidate, article):
pass

def normalized_categories_with_candidate(self, rec_categories, article):
rec_categories_with_candidate = rec_categories.copy()
self.add_article_to_categories(rec_categories_with_candidate, article)
return normalized_category_count(rec_categories_with_candidate)

def calibration(self, relevance_scores, articles, preferences, theta, topk) -> list[Article]:
# MR_i = \theta * reward_i - (1 - \theta)*C(S + i) # C is calibration
# R is all candidates (not selected yet)

recommendations = [] # final recommendation (topk index)
rec_categories = defaultdict(int) # frequency distribution of categories of S

for _ in range(topk):
candidate = None # next item
best_candidate_score = float("-inf")

for article_idx, article_score in enumerate(relevance_scores): # iterate R for next item
if article_idx in recommendations:
continue

normalized_candidate_topics = self.normalized_categories_with_candidate(
rec_categories, articles[article_idx]
)
calibration = compute_kl_divergence(preferences, normalized_candidate_topics)

adjusted_candidate_score = (1 - theta) * article_score - (theta * calibration)
if adjusted_candidate_score > best_candidate_score:
best_candidate_score = adjusted_candidate_score
candidate = article_idx

if candidate is not None:
recommendations.append(candidate)
self.add_article_to_categories(rec_categories, articles[candidate])

return recommendations


# from https://github.com/CCRI-POPROX/poprox-recommender/blob/feature/experiment0/tests/test_calibration.ipynb
def compute_kl_divergence(interacted_distr, reco_distr, kl_div=0.0, alpha=0.01):
"""
KL (p || q), the lower the better.
alpha is not really a tuning parameter, it's just there to make the
computation more numerically stable.
"""
for category, score in interacted_distr.items():
reco_score = reco_distr.get(category, 0.0)
reco_score = (1 - alpha) * reco_score + alpha * score
if reco_score != 0.0:
kl_div += score * np.log2(score / reco_score)

return kl_div
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import torch as th

from poprox_concepts import ArticleSet, InterestProfile
from poprox_recommender.components.diversifiers.calibration import Calibrator
from poprox_recommender.topics import extract_locality, normalized_category_count


# Locality Calibration uses MMR
# to rerank recommendations according to
# locality calibration
class LocalityCalibrator(Calibrator):
def __init__(self, theta: float = 0.1, num_slots=10):
super().__init__(theta, num_slots)

def __call__(self, candidate_articles: ArticleSet, interest_profile: InterestProfile) -> ArticleSet:
normalized_locality_prefs = normalized_category_count(interest_profile.click_locality_counts)

if candidate_articles.scores is not None:
article_scores = th.sigmoid(th.tensor(candidate_articles.scores))
else:
article_scores = th.zeros(len(candidate_articles.articles))

article_scores = article_scores.cpu().detach().numpy()

article_indices = self.calibration(
article_scores,
candidate_articles.articles,
normalized_locality_prefs,
self.theta,
topk=self.num_slots,
)
return ArticleSet(articles=[candidate_articles.articles[int(idx)] for idx in article_indices])

def add_article_to_categories(self, rec_categories, article):
locality_list = extract_locality(article)
for locality in locality_list:
rec_categories[locality] = rec_categories.get(locality, 0) + 1
4 changes: 2 additions & 2 deletions src/poprox_recommender/components/diversifiers/pfar.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from poprox_concepts import Article, ArticleSet, InterestProfile
from poprox_recommender.lkpipeline import Component
from poprox_recommender.pytorch.decorators import torch_inference
from poprox_recommender.topics import GENERAL_TOPICS, extract_general_topics, normalized_topic_count
from poprox_recommender.topics import GENERAL_TOPICS, extract_general_topics, normalized_category_count


class PFARDiversifier(Component):
Expand All @@ -30,7 +30,7 @@ def __call__(self, candidate_articles: ArticleSet, interest_profile: InterestPro
for topic, click_count in interest_profile.click_topic_counts.items():
topic_preferences[topic] = click_count

normalized_topic_prefs = normalized_topic_count(topic_preferences)
normalized_topic_prefs = normalized_category_count(topic_preferences)

article_indices = pfar_diversification(
article_scores,
Expand Down
83 changes: 10 additions & 73 deletions src/poprox_recommender/components/diversifiers/topic_calibration.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
from collections import defaultdict

import numpy as np
import torch as th

from poprox_concepts import Article, ArticleSet, InterestProfile
from poprox_recommender.lkpipeline import Component
from poprox_recommender.topics import extract_general_topics, normalized_topic_count
from poprox_concepts import ArticleSet, InterestProfile
from poprox_recommender.components.diversifiers.calibration import Calibrator
from poprox_recommender.topics import extract_general_topics, normalized_category_count


# Topic Calibration uses MMR
# to rerank recommendations according to
# topic calibration
class TopicCalibrator(Component):
def __init__(self, theta: float = 0.1, num_slots=10):
# Theta term controls the score and calibration tradeoff, the higher
# the theta the higher the resulting recommendation will be calibrated.
self.theta = theta
self.num_slots = num_slots

class TopicCalibrator(Calibrator):
def __call__(self, candidate_articles: ArticleSet, interest_profile: InterestProfile) -> ArticleSet:
normalized_topic_prefs = self.compute_topic_dist(interest_profile)

Expand All @@ -28,7 +21,7 @@ def __call__(self, candidate_articles: ArticleSet, interest_profile: InterestPro

article_scores = article_scores.cpu().detach().numpy()

article_indices = topic_calibration(
article_indices = self.calibration(
article_scores,
candidate_articles.articles,
normalized_topic_prefs,
Expand All @@ -48,66 +41,10 @@ def compute_topic_dist(self, interest_profile):
for topic, click_count in interest_profile.click_topic_counts.items():
topic_preferences[topic] += click_count

normalized_topic_prefs = normalized_topic_count(topic_preferences)
normalized_topic_prefs = normalized_category_count(topic_preferences)
return normalized_topic_prefs


def topic_calibration(relevance_scores, articles, topic_preferences, theta, topk) -> list[Article]:
# MR_i = \theta * reward_i - (1 - \theta)*C(S + i) # C is calibration
# R is all candidates (not selected yet)

recommendations = [] # final recommendation (topk index)
rec_topics = defaultdict(int) # frequency distribution of topics of S

for k in range(topk):
candidate = None # next item
best_candidate_score = float("-inf")

for article_idx, article_score in enumerate(relevance_scores): # iterate R for next item
if article_idx in recommendations:
continue

normalized_candidate_topics = normalized_topics_with_candidate(rec_topics, articles[article_idx])
calibration = compute_kl_divergence(topic_preferences, normalized_candidate_topics)

adjusted_candidate_score = (1 - theta) * article_score - (theta * calibration)
if adjusted_candidate_score > best_candidate_score:
best_candidate_score = adjusted_candidate_score
candidate = article_idx

if candidate is not None:
recommendations.append(candidate)
add_article_to_topics(rec_topics, articles[candidate])

return recommendations


def add_article_to_topics(rec_topics, article):
topics = extract_general_topics(article)
for topic in topics:
rec_topics[topic] = rec_topics.get(topic, 0) + 1


def normalized_topics_with_candidate(rec_topics, article):
rec_topics_with_candidate = rec_topics.copy()
add_article_to_topics(rec_topics_with_candidate, article)
return normalized_topic_count(rec_topics_with_candidate)


# from https://github.com/CCRI-POPROX/poprox-recommender/blob/feature/experiment0/tests/test_calibration.ipynb
def compute_kl_divergence(interacted_distr, reco_distr):
"""
KL (p || q), the lower the better.
alpha is not really a tuning parameter, it's just there to make the
computation more numerically stable.
"""
kl_div = 0.0
alpha = 0.01
for genre, score in interacted_distr.items():
reco_score = reco_distr.get(genre, 0.0)
reco_score = (1 - alpha) * reco_score + alpha * score
if reco_score != 0.0:
kl_div += score * np.log2(score / reco_score)

return kl_div
def add_article_to_categories(self, rec_topics, article):
topics = extract_general_topics(article)
for topic in topics:
rec_topics[topic] = rec_topics.get(topic, 0) + 1
3 changes: 2 additions & 1 deletion src/poprox_recommender/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from poprox_concepts import ArticleSet
from poprox_concepts.api.recommendations import RecommendationRequest, RecommendationResponse
from poprox_recommender.recommenders import select_articles
from poprox_recommender.topics import user_topic_preference
from poprox_recommender.topics import user_locality_preference, user_topic_preference

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -49,6 +49,7 @@ def generate_recs(event, context):
clicked_articles = ArticleSet(articles=clicked_articles)

profile.click_topic_counts = user_topic_preference(req.past_articles, profile.click_history)
profile.click_locality_counts = user_locality_preference(req.past_articles, profile.click_history)

outputs = select_articles(
candidate_articles,
Expand Down
27 changes: 21 additions & 6 deletions src/poprox_recommender/recommenders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
from typing import Any

from poprox_concepts import ArticleSet, InterestProfile
from poprox_recommender.components.diversifiers import MMRDiversifier, PFARDiversifier, TopicCalibrator
from poprox_recommender.components.diversifiers import (
LocalityCalibrator,
MMRDiversifier,
PFARDiversifier,
TopicCalibrator,
)
from poprox_recommender.components.embedders import NRMSArticleEmbedder, NRMSUserEmbedder
from poprox_recommender.components.filters import TopicFilter
from poprox_recommender.components.joiners import Fill
Expand Down Expand Up @@ -85,7 +90,8 @@ def build_pipelines(num_slots: int, device: str) -> dict[str, Pipeline]:
topk_ranker = TopkRanker(num_slots=num_slots)
mmr = MMRDiversifier(num_slots=num_slots)
pfar = PFARDiversifier(num_slots=num_slots)
calibrator = TopicCalibrator(num_slots=num_slots)
locality_calibrator = LocalityCalibrator(num_slots=num_slots)
topic_calibrator = TopicCalibrator(num_slots=num_slots)
sampler = SoftmaxSampler(num_slots=num_slots, temperature=30.0)

nrms_pipe = build_pipeline(
Expand All @@ -112,11 +118,19 @@ def build_pipelines(num_slots: int, device: str) -> dict[str, Pipeline]:
num_slots=num_slots,
)

cali_pipe = build_pipeline(
"NRMS+Calibration",
topic_cali_pipe = build_pipeline(
"NRMS+Topic+Calibration",
article_embedder=article_embedder,
user_embedder=user_embedder,
ranker=calibrator,
ranker=topic_calibrator,
num_slots=num_slots,
)

locality_cali_pipe = build_pipeline(
"NRMS+Locality+Calibration",
article_embedder=article_embedder,
user_embedder=user_embedder,
ranker=locality_calibrator,
num_slots=num_slots,
)

Expand All @@ -132,7 +146,8 @@ def build_pipelines(num_slots: int, device: str) -> dict[str, Pipeline]:
"nrms": nrms_pipe,
"mmr": mmr_pipe,
"pfar": pfar_pipe,
"topic-cali": cali_pipe,
"topic-cali": topic_cali_pipe,
"locality-cali": locality_cali_pipe,
"softmax": softmax_pipe,
}

Expand Down
Loading

0 comments on commit 2fd4b13

Please sign in to comment.