diff --git a/coldsweat/fetcher.py b/coldsweat/fetcher.py index 7eff3dc..442386a 100644 --- a/coldsweat/fetcher.py +++ b/coldsweat/fetcher.py @@ -7,8 +7,7 @@ License: MIT (see LICENSE for details) ''' -import sys, re, time, cgi, urlparse, imp -from os import path +import sys, os, re, time, cgi, urlparse, imp from datetime import datetime from peewee import IntegrityError @@ -18,12 +17,14 @@ from models import * from utilities import * -from filters import escape_html +from filters import escape_html, status_title from coldsweat import * from markup import html -MAX_TITLE_LENGTH = 255 -POSITIVE_STATUS_CODES = 200, 302, 304 # Other redirects are handled by Requests +ENTRY_TAG_URI = 'tag:lab.passiomatic.com,%d:coldsweat:entry:%s' +MAX_TITLE_LENGTH = 255 +POSITIVE_STATUS_CODES = 200, 302, 304 # Other redirects are handled by Requests + # ------------------------------------------------------ # Entry data @@ -198,8 +199,45 @@ def fetch_url(url, timeout=None, etag=None, modified_since=None): return response -def fetch_feed(feed, add_entries=False): + +def add_synthesized_entry(feed, title, content): + ''' + Create an HTML entry for the given feed. + ''' + + now = datetime.utcnow() + + # Since we don't know the mechanism the feed used to build a GUID for its entries + # synthesize an tag URI from the link and a random string. This makes + # entries internally generated by Coldsweat reasonably globally unique + try: + nonce = os.urandom(16).encode('base64') + except NotImplementedError: # urandom might not be available on certain platforms + nonce = now.isoformat() + + guid = ENTRY_TAG_URI % (now.year, make_sha1_hash(feed.self_link + nonce)) + + entry = Entry( + guid = guid, + feed = feed, + title = title, + author = 'Coldsweat', + content = content, + #@@TODO: mime_type='text/html', + last_updated_on = now + ) + entry.save() + logger.debug("synthesized entry %s" % guid) + return entry + + +def fetch_feed(feed, add_entries=False): + + def synthesize_entry(reason): + title, content = u'This feed has been disabled', render_template(os.path.join(template_dir, '_entry_feed_disabled.html'), {'reason': reason}) + return add_synthesized_entry(feed, title, content) + def post_fetch(status, error=False): if status: feed.last_status = status @@ -210,6 +248,7 @@ def post_fetch(status, error=False): feed.is_enabled = False feed.last_status = status # Save status code for posterity logger.warn("%s has too many errors, disabled" % netloc) + synthesize_entry('Feed has accomulated too many errors (last was %s).' % status_title(status)) feed.save() logger.debug("fetching %s" % feed.self_link) @@ -251,6 +290,7 @@ def post_fetch(status, error=False): else: feed.is_enabled = False logger.warn("new %s location %s is duplicated, disabled" % (netloc, self_link)) + synthesize_entry('Feed has a duplicated web address.') post_fetch(DuplicatedFeedError.code) return @@ -259,8 +299,9 @@ def post_fetch(status, error=False): post_fetch(response.status_code) return elif response.status_code == 410: # Gone - logger.warn("%s is gone, disabled" % netloc) feed.is_enabled = False + logger.warn("%s is gone, disabled" % netloc) + synthesize_entry('Feed has been removed from the origin server.') post_fetch(response.status_code) return elif response.status_code not in POSITIVE_STATUS_CODES: # No good @@ -272,7 +313,7 @@ def post_fetch(status, error=False): # Got parsing error? Log error but do not increment the error counter if hasattr(soup, 'bozo') and soup.bozo: logger.info("%s caused a parser error (%s), tried to parse it anyway" % (netloc, soup.bozo_exception)) - post_fetch(response.status_code, error=False) + post_fetch(response.status_code) feed.etag = response.headers.get('ETag', None) diff --git a/coldsweat/templates/_entry_feed_disabled.html b/coldsweat/templates/_entry_feed_disabled.html new file mode 100644 index 0000000..6a7bf8d --- /dev/null +++ b/coldsweat/templates/_entry_feed_disabled.html @@ -0,0 +1,2 @@ +
{{reason}}
+You can enable it again from the “Feed status” dialog in the Coldsweat web reader.
\ No newline at end of file diff --git a/coldsweat/templates/entry.html b/coldsweat/templates/entry.html index 20465e5..b99b931 100644 --- a/coldsweat/templates/entry.html +++ b/coldsweat/templates/entry.html @@ -32,7 +32,9 @@