Skip to content

Commit

Permalink
Sort questions by "hotness" (#195)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonhoo authored Dec 28, 2024
1 parent 9c7ca7b commit cca8820
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 35 deletions.
16 changes: 9 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,15 @@ This is why the API to look up event info and question texts/authors is
separated from looking up vote counts -- the former can have much longer
cache time.

To allow querying questions for a given event and receive them in sorted
order, `questions` also has a [global secondary index] called `top`
whose partition key is the event ULID and sort key `votes`. That index
also projects out the "answered" and "hidden" fields so that a single
query to that index gives all the mutable state for an event's question
list (and can thus be queried with a single DynamoDB call by the
Lambda).
To allow querying questions for a given event, `questions` also has a
[global secondary index] called `top` whose partition key is the event
ULID. We don't use a sort key, since we want to sort by a [more complex
function]. That index also projects out the "answered" and "hidden"
fields so that a single query to that index gives all the mutable state
for an event's question list (and can thus be queried with a single
DynamoDB call by the Lambda).

[more complex function]: https://www.evanmiller.org/ranking-news-items-with-upvotes.html

**Metrics and Logging.**

Expand Down
4 changes: 1 addition & 3 deletions client/src/List.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@
la.newQuestions = la.newQuestions.filter((qid) => !(qid in nowPresent));
for (const newQ of la.newQuestions) {
console.info("add in", newQ);
// new questions always go at the bottom
qs.push({
qid: newQ,
hidden: false,
Expand Down Expand Up @@ -188,9 +189,6 @@
}
}
}
qs.sort((a, b) => {
return b.votes - a.votes;
});
return qs;
}
Expand Down
2 changes: 1 addition & 1 deletion client/src/store.js
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,10 @@ export async function questionData(qid, qs) {
// that's small-ish.
//
// ref https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchGetItem.html
qids.sort();
if (qids.length > 25) {
qids = qids.slice(0, 25);
}
qids.sort();
let arg = qids.join(",");
// and go!
// TODO: handle failure
Expand Down
8 changes: 1 addition & 7 deletions infra/dynamodb.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@ resource "aws_dynamodb_table" "questions" {
type = "S"
}

attribute {
name = "votes"
type = "N"
}

ttl {
attribute_name = "expire"
enabled = true
Expand All @@ -42,8 +37,7 @@ resource "aws_dynamodb_table" "questions" {
global_secondary_index {
name = "top"
hash_key = "eid"
range_key = "votes"
projection_type = "INCLUDE"
non_key_attributes = ["answered", "hidden"]
non_key_attributes = ["answered", "hidden", "votes"]
}
}
11 changes: 5 additions & 6 deletions server/run-migrations.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,24 @@ aws dynamodb create-table \
--attribute-definitions AttributeName=id,AttributeType=S \
--key-schema AttributeName=id,KeyType=HASH \
--billing-mode PAY_PER_REQUEST \
--endpoint-url ${ENDPOINT_URL} >/dev/null
--endpoint-url "${ENDPOINT_URL}" >/dev/null

aws dynamodb update-time-to-live \
--table-name events \
--time-to-live-specification Enabled=true,AttributeName=expire \
--endpoint-url ${ENDPOINT_URL} >/dev/null
--endpoint-url "${ENDPOINT_URL}" >/dev/null

echo "🗒️ Creating 'questions' table and 🚄 GSI..."
aws dynamodb create-table \
--table-name questions \
--attribute-definitions AttributeName=id,AttributeType=S \
AttributeName=eid,AttributeType=S \
AttributeName=votes,AttributeType=N \
--key-schema AttributeName=id,KeyType=HASH \
--global-secondary-indexes 'IndexName=top,KeySchema=[{AttributeName=eid,KeyType=HASH},{AttributeName=votes,KeyType=RANGE}],Projection={ProjectionType=INCLUDE,NonKeyAttributes=[answered,hidden]}' \
--global-secondary-indexes 'IndexName=top,KeySchema=[{AttributeName=eid,KeyType=HASH}],Projection={ProjectionType=INCLUDE,NonKeyAttributes=[answered,hidden,votes]}' \
--billing-mode PAY_PER_REQUEST \
--endpoint-url ${ENDPOINT_URL} >/dev/null
--endpoint-url "${ENDPOINT_URL}" >/dev/null

aws dynamodb update-time-to-live \
--table-name questions \
--time-to-live-specification Enabled=true,AttributeName=expire \
--endpoint-url ${ENDPOINT_URL} >/dev/null
--endpoint-url "${ENDPOINT_URL}" >/dev/null
59 changes: 48 additions & 11 deletions server/src/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ use http::{
header::{self, HeaderName},
StatusCode,
};
use std::collections::HashMap;
use std::{
collections::HashMap,
time::{Duration, SystemTime},
};
use ulid::Ulid;

#[allow(unused_imports)]
Expand Down Expand Up @@ -66,15 +69,6 @@ impl Backend {
let qs = questions_by_eid
.get_mut(eid)
.expect("list for non-existing event");
qs.sort_unstable_by_key(|qid| {
std::cmp::Reverse(
questions[qid]["votes"]
.as_n()
.expect("votes is always set")
.parse::<usize>()
.expect("votes are always numbers"),
)
});

Ok(QueryOutput::builder()
.set_count(Some(qs.len() as i32))
Expand Down Expand Up @@ -193,7 +187,50 @@ async fn list_inner(
match dynamo.list(&eid, has_secret).await {
Ok(qs) => {
trace!(%eid, n = %qs.count(), "listed questions");
let questions: Vec<_> = qs.items().iter().filter_map(serialize_question).collect();
let mut questions: Vec<_> = qs.items().iter().filter_map(serialize_question).collect();

// sort based on "hotness" of the question over time:
// https://www.evanmiller.org/ranking-news-items-with-upvotes.html
// the wrapper struct is needed because f64 doesn't impl Ord
#[derive(Debug)]
#[repr(transparent)]
struct Score(f64);
impl PartialEq for Score {
fn eq(&self, other: &Self) -> bool {
self.cmp(other).is_eq()
}
}
impl Eq for Score {}
impl PartialOrd for Score {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Score {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.0.total_cmp(&other.0)
}
}
let now = SystemTime::now();
let score = |q: &serde_json::Value| {
let dt_in_minutes_rounded_down = now
.duration_since(
q["qid"]
.as_str()
.expect("it's a ULID")
.parse::<Ulid>()
.expect("produced as ULID by us")
.datetime(),
)
.unwrap_or(Duration::ZERO)
.as_secs()
/ 60;
let dt = dt_in_minutes_rounded_down + 1;
let votes = q["votes"].as_u64().expect("votes is a number") as f64;
let exp = (-1. * dt as f64).exp();
Score(exp * votes / (1. - exp))
};
questions.sort_by_cached_key(|q| std::cmp::Reverse(score(q)));

let max_age = if has_secret {
// hosts should be allowed to see more up-to-date views
Expand Down

0 comments on commit cca8820

Please sign in to comment.