Skip to content

Commit

Permalink
Closes internetarchive#5080. Adds basic ability to search/filter one'…
Browse files Browse the repository at this point in the history
…s reading log.
  • Loading branch information
scottbarnes committed Oct 13, 2022
1 parent 50541cb commit 6be383e
Show file tree
Hide file tree
Showing 9 changed files with 395 additions and 185 deletions.
165 changes: 143 additions & 22 deletions openlibrary/core/bookshelves.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import logging
import web
from datetime import date
from typing import Literal, Optional, cast
from typing import Literal, Optional, cast, Any, Final
from collections.abc import Iterable
from openlibrary.plugins.worksearch.search import get_solr

from openlibrary.utils.dateutil import DATE_ONE_MONTH_AGO, DATE_ONE_WEEK_AGO

from . import db

logger = logging.getLogger(__name__)

FILTER_BOOK_LIMIT: Final = 30_000


class Bookshelves(db.CommonExtras):

Expand Down Expand Up @@ -146,7 +150,7 @@ def count_total_books_logged_by_user(
@classmethod
def count_total_books_logged_by_user_per_shelf(
cls, username: str, bookshelf_ids: list[str] = None
) -> dict[str, int]:
) -> dict[int, int]:
"""Returns a dict mapping the specified user's bookshelves_ids to the
number of number of books logged per each shelf, i.e. {bookshelf_id:
count}. By default, we limit bookshelf_ids to those in PRESET_BOOKSHELVES
Expand All @@ -173,19 +177,64 @@ def count_total_books_logged_by_user_per_shelf(
def get_users_logged_books(
cls,
username: str,
bookshelf_id: str = None,
bookshelf_id: int = 0,
limit: int = 100,
page: int = 1, # Not zero-based counting!
sort: Literal['created asc', 'created desc'] = 'created desc',
) -> list[dict]:
"""Returns a list of Reading Log database records for books which
the user has logged. Records are described in core/schema.py
and include:
q: str = "",
) -> Any: # Circular imports prevent type hinting LoggedBooksData
"""
Returns LoggedBooksData containing Reading Log database records for books that
the user has logged. Also allows filtering/searching the reading log shelves,
and sorting reading log shelves (when not filtering).
The returned records ultimately come from Solr so that, as much as possible,
these query results may be used by anything relying on logged book data.
:param username: who logged this book
:param bookshelf_id: the ID of the bookshelf, see: PRESET_BOOKSHELVES.
If bookshelf_id is None, return books from all bookshelves.
:param q: an optional query string to filter the results.
"""
from openlibrary.core.models import LoggedBooksData
from openlibrary.plugins.worksearch.code import (
run_solr_query,
DEFAULT_SEARCH_FIELDS,
)

def add_storage_items_for_redirects(
reading_log_work_keys: list[str], solr_docs: list[web.Storage]
) -> list[web.storage]:
"""
Use reading_log_work_keys to fill in missing redirected items in the
the solr_docs query results.
Solr won't return matches for work keys that have been redirected. Because
we use Solr to build the lists of storage items that ultimately gets passed
to the templates, redirected items returned from the reading log DB will
'disappear' when not returned by Solr. This remedies that by filling in
dummy works, albeit with the correct work_id.
"""
for idx, work_key in enumerate(reading_log_work_keys):
corresponding_solr_doc = next(
(doc for doc in solr_docs if doc.key == work_key), None
)

if not corresponding_solr_doc:
solr_docs.insert(
idx,
web.storage(
{
"key": work_key,
"author_name": ["Unknown author"],
"title": "",
}
),
)

return solr_docs

shelf_totals = cls.count_total_books_logged_by_user_per_shelf(username)
oldb = db.get_db()
page = int(page or 1)
data = {
Expand All @@ -194,26 +243,98 @@ def get_users_logged_books(
'offset': limit * (page - 1),
'bookshelf_id': bookshelf_id,
}
if sort == 'created desc':

# Filtering won't occur unless q >= 3, as limited in mybooks.my_books_view().
if q:
# Filtering by query needs a larger limit as we need (ideally) all of a
# user's added works from the reading log DB. The logged work IDs are used
# to query Solr, which searches for matches related to those work IDs.
data["limit"] = FILTER_BOOK_LIMIT

query = (
"SELECT * from bookshelves_books WHERE "
"SELECT work_id, created, edition_id from bookshelves_books WHERE "
"bookshelf_id=$bookshelf_id AND username=$username "
"ORDER BY created DESC "
"LIMIT $limit OFFSET $offset"
"LIMIT $limit"
)

reading_log_books = list(oldb.query(query, vars=data))
assert len(reading_log_books) <= FILTER_BOOK_LIMIT

# Wrap in quotes to avoid treating as regex. Only need this for fq
solr_keys = ('"/works/OL%sW"' % i['work_id'] for i in reading_log_books)
solr_resp = run_solr_query(
param={'q': q},
offset=data["offset"],
rows=limit,
facet=False,
# Putting these in fq allows them to avoid user-query processing, which
# can be (surprisingly) slow if we have ~20k OR clauses.
extra_params=[('fq', f'key:({" OR ".join(solr_keys)})')],
)

# Downstream many things expect a list of web.storage docs.
solr_docs = [web.storage(doc) for doc in solr_resp.docs]
total_results = solr_resp.num_found

else:
query = (
"SELECT * from bookshelves_books WHERE "
"bookshelf_id=$bookshelf_id AND username=$username "
"ORDER BY created ASC "
"LIMIT $limit OFFSET $offset"
if sort == 'created desc':
query = (
"SELECT work_id, created, edition_id from bookshelves_books WHERE "
"bookshelf_id=$bookshelf_id AND username=$username "
"ORDER BY created DESC "
"LIMIT $limit OFFSET $offset"
)
else:
query = (
"SELECT work_id, created, edition_id from bookshelves_books WHERE "
"bookshelf_id=$bookshelf_id AND username=$username "
"ORDER BY created ASC "
"LIMIT $limit OFFSET $offset"
)
if not bookshelf_id:
query = "SELECT * from bookshelves_books WHERE username=$username"
# XXX Removing limit, offset, etc from data looks like a bug
# unrelated / not fixing in this PR.
data = {'username': username}

reading_log_books: list[web.storage] = list( # type: ignore[no-redef]
oldb.query(query, vars=data)
)
if not bookshelf_id:
query = "SELECT * from bookshelves_books WHERE username=$username"
# XXX Removing limit, offset, etc from data looks like a bug
# unrelated / not fixing in this PR.
data = {'username': username}
return list(oldb.query(query, vars=data))
reading_log_work_keys = [ # type: ignore[assignment]
'/works/OL%sW' % i['work_id'] for i in reading_log_books
]
solr_docs = get_solr().get_many(
reading_log_work_keys,
fields=DEFAULT_SEARCH_FIELDS | {'subject', 'person', 'place', 'time'},
)
solr_docs = add_storage_items_for_redirects(
reading_log_work_keys, solr_docs
)
assert len(solr_docs) == len(
reading_log_work_keys
), "solr_docs is missing an item/items from reading_log_work_keys; see add_storage_items_for_redirects()" # noqa E501

total_results = shelf_totals.get(bookshelf_id, 0)

# Insert {logged_edition} if present and {logged_date} into the Solr work.
# These dates are not used for sort-by-added-date. The DB handles that.
# Currently only used in JSON requests.
for i in range(len(solr_docs)):
solr_docs[i].logged_date = reading_log_books[i]['created']
solr_docs[i].logged_edition = (
'/books/OL%sM' % reading_log_books[i]['edition_id']
if reading_log_books[i]['edition_id']
else ''
)

return LoggedBooksData(
username=username,
q=q,
page_size=limit,
total_results=total_results,
shelf_totals=shelf_totals,
docs=solr_docs,
)

@classmethod
def iterate_users_logged_books(cls, username: str) -> Iterable[dict]:
Expand Down
43 changes: 42 additions & 1 deletion openlibrary/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import requests
from typing import Any
from collections import defaultdict
from dataclasses import dataclass, field

from infogami.infobase import client

Expand Down Expand Up @@ -445,7 +446,7 @@ def edition_count(self):
def get_lists(self, limit=50, offset=0, sort=True):
return self._get_lists(limit=limit, offset=offset, sort=sort)

def get_users_rating(self, username):
def get_users_rating(self, username: str) -> int | None:
if not username:
return None
work_id = extract_numeric_id_from_olid(self.key)
Expand Down Expand Up @@ -1120,6 +1121,46 @@ def get_default_cover(self):
return Image(web.ctx.site, "b", cover_id)


@dataclass
class LoggedBooksData:
"""
LoggedBooksData contains data used for displaying a page of the reading log, such
as the page size for pagination, the docs returned from the reading log DB for
a particular shelf, query, sorting, etc.
param page_size specifies how many results per page should display in the
reading log.
param shelf_totals holds the counts for books on the three default shelves.
param docs holds the documents returned from Solr.
param q holds an optional query string (len >= 3, per my_books_view in mybooks.py)
for filtering the reading log.
param ratings holds a list of ratings such that the index of each rating corresponds
to the index of each doc/work in self.docs.
"""

username: str
page_size: int
total_results: int
shelf_totals: dict[int, int]
docs: list[web.storage]
q: str = ""
ratings: list[int] = field(default_factory=list)

def load_ratings(self) -> None:
"""
Load the ratings into self.ratings from the storage docs, such that the index
of each returned rating corresponds to the index of each web storage doc. This
allows them to be zipped together if needed. E.g. in a template.
The intent of this is so that there is no need to query ratings from the
template, as the docs and ratings are together when needed.
"""
for doc in self.docs:
work_id = extract_numeric_id_from_olid(doc.key)
rating = Ratings.get_users_rating_for_work(self.username, work_id)
self.ratings.append(rating or 0)


def register_models():
client.register_thing_class(None, Thing) # default
client.register_thing_class('/type/edition', Edition)
Expand Down
5 changes: 3 additions & 2 deletions openlibrary/core/ratings.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,13 @@ def get_all_works_ratings(cls, work_id) -> list:
return list(oldb.query(query, vars={'work_id': int(work_id)}))

@classmethod
def get_users_rating_for_work(cls, username, work_id):
def get_users_rating_for_work(cls, username: str, work_id: str | int) -> int | None:
"""work_id must be convertible to int."""
oldb = db.get_db()
data = {'username': username, 'work_id': int(work_id)}
query = 'SELECT * from ratings where username=$username AND work_id=$work_id'
results = list(oldb.query(query, vars=data))
rating = results[0].rating if results else None
rating: int | None = results[0].rating if results else None
return rating

@classmethod
Expand Down
6 changes: 4 additions & 2 deletions openlibrary/macros/StarRatings.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
$def with (work, edition=None, redir_url=None, id='')
$def with (work, edition=None, redir_url=None, id='', rating=None)

$ username = ctx.user and ctx.user.key.split('/')[-1]
$ rating = work and username and work.get_users_rating(username)
$if rating is None:
$ rating = work and username and work.get_users_rating(username)
$ edition_key = edition.key if edition else ""
$ form_id = "ratingsForm%s" % id

Expand Down
Loading

0 comments on commit 6be383e

Please sign in to comment.