diff --git a/coldsweat/fetcher.py b/coldsweat/fetcher.py index 8c70fa1..dc0be4e 100644 --- a/coldsweat/fetcher.py +++ b/coldsweat/fetcher.py @@ -241,6 +241,7 @@ def _parse_feed(self, data): timestamp = t.get_timestamp(default=self.instant) content_type, content = t.get_content(('text/plain', '')) + thumbnail_url = t.get_thumbnail_url() entry = { 'feed_id': self.feed.id, @@ -250,6 +251,7 @@ def _parse_feed(self, data): 'author': t.get_author() or feed_author, 'content': content, 'content_type': content_type, + 'thumbnail_url': thumbnail_url, 'published_on': timestamp } new_entries.append(entry) @@ -270,7 +272,7 @@ def _parse_feed(self, data): # MySQL doesn't support conlict targets, see: # https://stackoverflow.com/questions/74691515/python-peewee-using-excluded-to-resolve-conflict-resolution count += (Entry.insert_many(batch).on_conflict( - # Pass down these new values only for certain values + # Pass down these new values only for certain fields preserve=[Entry.title, Entry.author, Entry.content, Entry.content_type]) .as_rowcount() .execute()) diff --git a/coldsweat/translators.py b/coldsweat/translators.py index 7f1a451..09a3b3d 100644 --- a/coldsweat/translators.py +++ b/coldsweat/translators.py @@ -34,6 +34,7 @@ def get_title(self): Feed.MAX_TITLE_LENGTH) return '' +IMAGE_TYPES = ['image/jpeg', 'image/png', 'image/gif'] class EntryTranslator(object): @@ -87,7 +88,21 @@ def get_content(self, default): # app.logger.debug(u'no entry content found, using default') return default - # Nullable fields + def get_thumbnail_url(self): + #See https://www.rssboard.org/media-rss + if 'media_content' in self.entry_dict: + media_content = self.entry_dict['media_content'][0] + try: + image_type = media_content['type'] + except KeyError: + image_type = None + if image_type in IMAGE_TYPES: + try: + return media_content['url'] + except KeyError: + pass + # @@TODO: Try to get thumbnail[0] instead + return '' def get_link(self): # Special case for FeedBurner entries