From b0c2d4878a7a712bf0e0c443f2ba230429eb9899 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Fri, 1 Jun 2018 19:19:44 +0200 Subject: [PATCH] Use different algorithm for trending tags (#7697) --- app/models/trending_tags.rb | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/app/models/trending_tags.rb b/app/models/trending_tags.rb index eedd92644cc2c2..287de2a8a0bc9b 100644 --- a/app/models/trending_tags.rb +++ b/app/models/trending_tags.rb @@ -2,17 +2,16 @@ class TrendingTags KEY = 'trending_tags' - HALF_LIFE = 1.day.to_i - MAX_ITEMS = 500 EXPIRE_HISTORY_AFTER = 7.days.seconds + THRESHOLD = 5 class << self def record_use!(tag, account, at_time = Time.now.utc) - return if disallowed_hashtags.include?(tag.name) || account.silenced? + return if disallowed_hashtags.include?(tag.name) || account.silenced? || account.bot? - increment_vote!(tag.id, at_time) increment_historical_use!(tag.id, at_time) increment_unique_use!(tag.id, account.id, at_time) + increment_vote!(tag.id, at_time) end def get(limit) @@ -24,8 +23,16 @@ def get(limit) private def increment_vote!(tag_id, at_time) - redis.zincrby(KEY, (2**((at_time.to_i - epoch) / HALF_LIFE)).to_f, tag_id.to_s) - redis.zremrangebyrank(KEY, 0, -MAX_ITEMS) if rand < (2.to_f / MAX_ITEMS) + expected = redis.pfcount("activity:tags:#{tag_id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts").to_f + expected = 1.0 if expected.zero? + observed = redis.pfcount("activity:tags:#{tag_id}:#{at_time.beginning_of_day.to_i}:accounts").to_f + + if expected > observed || observed < THRESHOLD + redis.zrem(KEY, tag_id.to_s) + else + score = ((observed - expected)**2) / expected + redis.zadd(KEY, score, tag_id.to_s) + end end def increment_historical_use!(tag_id, at_time) @@ -40,12 +47,6 @@ def increment_unique_use!(tag_id, account_id, at_time) redis.expire(key, EXPIRE_HISTORY_AFTER) end - # The epoch needs to be 2.5 years in the future if the half-life is one day - # While dynamic, it will always be the same within one year - def epoch - @epoch ||= Date.new(Date.current.year + 2.5, 10, 1).to_datetime.to_i - end - def disallowed_hashtags return @disallowed_hashtags if defined?(@disallowed_hashtags)