Skip to content

Commit

Permalink
Allow Entry.updated to be None instead of defaulting to first_updated…
Browse files Browse the repository at this point in the history
… (typing fails).

For #239/#183.
  • Loading branch information
lemon24 committed Oct 21, 2021
1 parent 136a266 commit eb1f96f
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 58 deletions.
3 changes: 0 additions & 3 deletions src/reader/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,9 +529,6 @@ def convert(key: str) -> Any:
updated = convert('updated_parsed')
published = convert('published_parsed')

if published and not updated and is_rss:
updated, published = published, None

return updated, published


Expand Down
17 changes: 11 additions & 6 deletions src/reader/_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,11 @@ def _get_entries_for_update_n_queries(
context = dict(feed_url=feed_url, id=id)
row = self.db.execute(
"""
SELECT updated, data_hash, data_hash_changed
SELECT
updated,
published,
data_hash,
data_hash_changed
FROM entries
WHERE feed = :feed_url
AND id = :id;
Expand All @@ -645,6 +649,7 @@ def _get_entries_for_update_one_query(
SELECT
entries.id IS NOT NULL,
entries.updated,
entries.published,
entries.data_hash,
entries.data_hash_changed
FROM input
Expand Down Expand Up @@ -1505,15 +1510,15 @@ def make_recent_last_select(id_prefix: str = 'entries.') -> Sequence[Any]:
coalesce (
CASE
WHEN
coalesce(entries.published, entries.updated)
coalesce(entries.published, entries.updated, entries.first_updated)
>= :recent_threshold
THEN entries.first_updated_epoch
END,
entries.published, entries.updated
entries.published, entries.updated, entries.first_updated
)
""",
),
('kinda_published', 'coalesce(published, updated)'),
('kinda_published', 'coalesce(published, updated, first_updated)'),
f'{id_prefix}feed',
'last_updated',
('negative_feed_order', '- feed_order'),
Expand All @@ -1537,7 +1542,7 @@ def make_by_kinda_cte(recent: bool) -> str:
kinda_first_updated = 'first_updated_epoch'
sign = '>='
else:
kinda_first_updated = 'coalesce(published, updated)'
kinda_first_updated = 'coalesce(published, updated, first_updated)'
sign = '<'

return f"""
Expand All @@ -1546,7 +1551,7 @@ def make_by_kinda_cte(recent: bool) -> str:
id,
last_updated,
{kinda_first_updated} as kinda_first_updated,
coalesce(published, updated) as kinda_published,
coalesce(published, updated, first_updated) as kinda_published,
- feed_order as negative_feed_order
FROM entries
WHERE kinda_published {sign} :recent_threshold
Expand Down
5 changes: 4 additions & 1 deletion src/reader/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,10 @@ class EntryForUpdate(NamedTuple):
"""Update-relevant information about an existing entry, from Storage."""

#: The date the entry was last updated, according to the entry.
updated: datetime
updated: Optional[datetime]

#: The date the entry was published, according to the entry.
published: Optional[datetime]

#: The hash of the corresponding EntryData.
hash: Optional[bytes]
Expand Down
48 changes: 18 additions & 30 deletions src/reader/_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,49 +121,37 @@ def should_update_feed(self, new: FeedData, entries_to_update: bool) -> bool:

return False

def compute_entry_updated(
self, id: str, new: Optional[datetime], old: Optional[datetime]
) -> Optional[datetime]:
def should_update_entry(
self, new: EntryData[Optional[datetime]], old: Optional[EntryForUpdate]
) -> Tuple[Optional[EntryData[datetime]], bool]:
def debug(msg: str, *args: Any) -> None:
self.log.debug("entry %r: " + msg, id, *args)

if not new:
if not old:
# FIXME: this should say added
debug("entry updated")
return new, False

new_ref_dt = new.updated or new.published
old_ref_dt = old.updated or old.published

if not new_ref_dt:
debug("has no updated, updating but not changing updated")
debug("entry added/updated")
return old or self.now
return new, False

if self.stale:
debug("feed marked as stale, updating anyway")
debug("entry added/updated")
return new
return new, False

if old and new <= old:
if old_ref_dt and new_ref_dt <= old_ref_dt:
debug(
"entry not updated, skipping (old updated %s, new updated %s)", old, new
)
return None

debug("entry updated")
return new

def should_update_entry(
self, new: EntryData[Optional[datetime]], old: Optional[EntryForUpdate]
) -> Tuple[Optional[EntryData[datetime]], bool]:

updated = self.compute_entry_updated(new.id, new.updated, old and old.updated)

if updated:
return EntryData(**new._replace(updated=updated).__dict__), False

# At this point, new should have .updated is not None.
# otherwise, compute_entry_updated() would have returned
# either old.updated or self.now.
# Remove this when it stops doing that, as proposed in this comment:
# https://github.com/lemon24/reader/issues/179#issuecomment-663840297
assert new.updated is not None

# If old is None, compute_entry_updated() returned something.
assert old is not None
else:
debug("entry updated")
return new, False

# Check if the entry content actually changed:
# https://github.com/lemon24/reader/issues/179
Expand Down
7 changes: 4 additions & 3 deletions tests/data/full.rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
EntryData(
feed_url=feed.url,
id='7bd204c6-1655-4c27-aeee-53f933c5395f',
updated=datetime.datetime(2009, 9, 6, 16, 20),
updated=None,
title='Example entry',
link='http://www.example.com/blog/post/1',
author='Example editor',
published=None,
published=datetime.datetime(2009, 9, 6, 16, 20),
summary='Here is some text containing an interesting description.',
content=(
# the text/plain type comes from feedparser
Expand All @@ -40,7 +40,8 @@
EntryData(
feed_url=feed.url,
id='00000000-1655-4c27-aeee-00000000',
updated=datetime.datetime(2009, 9, 6, 0, 0, 0),
updated=None,
published=datetime.datetime(2009, 9, 6, 0, 0, 0),
title='Example entry, again',
),
]
4 changes: 2 additions & 2 deletions tests/fakeparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _make_feed(number, updated=None, **kwargs):
)


def _make_entry(feed_number, number, updated, **kwargs):
def _make_entry(feed_number, number, updated=None, **kwargs):
return EntryData(
f'{feed_number}',
# evals to tuple
Expand Down Expand Up @@ -51,7 +51,7 @@ def feed(self, number, updated=None, **kwargs):
self.entries.setdefault(number, OrderedDict())
return feed

def entry(self, feed_number, number, updated, **kwargs):
def entry(self, feed_number, number, updated=None, **kwargs):
entry = _make_entry(feed_number, number, updated, **kwargs)
self.entries[feed_number][number] = entry
return entry
Expand Down
66 changes: 54 additions & 12 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,15 +347,7 @@ def test_update_entry_updated(reader, call_update_method, caplog, monkeypatch):
def test_update_no_updated(reader, chunk_size, call_update_method):
"""If a feed has updated == None, it should be treated as updated.
If an entry has updated == None, it should:
* be updated every time, but
* have updated set to the first time it was updated until it has a new
updated != None
This means a stored entry always has updated set.
https://github.com/lemon24/reader/issues/88
If an entry has updated == None, it should be updated every time.
"""
reader._storage.chunk_size = chunk_size
Expand All @@ -374,7 +366,7 @@ def test_update_no_updated(reader, chunk_size, call_update_method):
assert set(reader.get_entries()) == {
entry_one.as_entry(
feed=feed,
updated=datetime(2010, 1, 1),
updated=None,
first_updated=datetime(2010, 1, 1),
last_updated=datetime(2010, 1, 1),
)
Expand All @@ -391,13 +383,11 @@ def test_update_no_updated(reader, chunk_size, call_update_method):
assert set(reader.get_entries()) == {
entry_one.as_entry(
feed=feed,
updated=datetime(2010, 1, 1),
first_updated=datetime(2010, 1, 1),
last_updated=datetime(2010, 1, 2),
),
entry_two.as_entry(
feed=feed,
updated=datetime(2010, 1, 2),
first_updated=datetime(2010, 1, 2),
last_updated=datetime(2010, 1, 2),
),
Expand Down Expand Up @@ -1219,6 +1209,8 @@ def test_get_entries_recent_order(
"""Entries should be sorted descending by (with decreasing priority):
* entry first updated (only if newer than _storage.recent_threshold)
* as of 2021-10 / #239, this test isn't covering this;
test_get_entries_recent_first_updated_order (below) is
* entry published (or entry updated if published is none)
* feed URL
* entry last updated
Expand Down Expand Up @@ -1283,6 +1275,56 @@ def test_get_entries_recent_order(
assert [eval(e.id) for e in call_method(reader)] == [t[:2] for t in expected]


@pytest.mark.parametrize(
'chunk_size',
[
# the default
Storage.chunk_size,
# rough result size for this test
1,
2,
3,
8,
# unchunked query
0,
],
)
@with_call_entries_recent_method
def test_get_entries_recent_first_updated_order(
reader, chunk_size, pre_stuff, call_method
):
"""For entries with no published/updated,
entries should be sorted by first_updated.
"""
reader._storage.chunk_size = chunk_size

parser = Parser()
reader._parser = parser
feed = parser.feed(1, datetime(2010, 1, 1))
reader.add_feed(feed.url)

reader._now = lambda: naive_datetime(2010, 1, 2)
three = parser.entry(1, 3)
reader.update_feeds()

reader._now = lambda: naive_datetime(2010, 1, 4)
two = parser.entry(1, 2)
reader.update_feeds()

reader._now = lambda: naive_datetime(2010, 1, 1)
four = parser.entry(1, 4, datetime(2010, 1, 1))
reader.update_feeds()

reader._now = lambda: naive_datetime(2010, 1, 3)
one = parser.entry(1, 1, datetime(2010, 1, 1))
reader.update_feeds()

pre_stuff(reader)

assert [eval(e.id)[1] for e in call_method(reader)] == [2, 1, 3, 4]


@pytest.mark.parametrize(
'chunk_size',
[
Expand Down
2 changes: 1 addition & 1 deletion tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ def test_get_entries_for_update(storage_cls):
)

assert list(storage.get_entries_for_update([('feed', 'one'), ('feed', 'two')])) == [
EntryForUpdate(datetime(2010, 1, 1), entry.hash, 0),
EntryForUpdate(datetime(2010, 1, 1), None, entry.hash, 0),
None,
]

Expand Down

0 comments on commit eb1f96f

Please sign in to comment.