Merge branch 'master' of https://github.com/unkn0w7n/calibre

kovidgoyal · Sep 30, 2024 · 56eab0e · 56eab0e
2 parents 5b17a24 + 812cf96
commit 56eab0e
Show file tree

Hide file tree

Showing 8 changed files with 31 additions and 57 deletions.
diff --git a/recipes/nytfeeds.recipe b/recipes/nytfeeds.recipe
@@ -109,11 +109,12 @@ def parse_cnt(cnt):
  yield ''.join(parse_fmt_type(cnt))
  else:
  for cnt_ in cnt[k]:
- yield from parse_types(cnt_)
+ yield ''.join(parse_types(cnt_))
  if isinstance(cnt[k], dict):
- yield from parse_types(cnt[k])
- if cnt.get('text') and 'formats' not in cnt:
- yield cnt['text']
+ yield ''.join(parse_types(cnt[k]))
+ if cnt.get('text') and 'formats' not in cnt and 'content' not in cnt:
+ if isinstance(cnt['text'], str):
+ yield cnt['text']
 
 def parse_types(x):
  typename = x.get('__typename', '')
@@ -141,9 +142,6 @@ def parse_types(x):
  elif typename == 'RuleBlock':
  yield '<hr/>'
 
- elif typename in {'ImageBlock', 'VideoBlock', 'InteractiveBlock'}:
- yield "".join(parse_types(x['media']))
-
  elif typename == 'Image':
  yield "".join(parse_image(x))
 
@@ -161,23 +159,15 @@ def parse_types(x):
  elif typename == 'ListItemBlock':
  yield f'<li>{"".join(parse_cnt(x))}</li>'
 
- elif typename == 'CapsuleBlock':
- if x['capsuleContent'].get('body'):
- yield "".join(parse_cnt(x['capsuleContent']['body']))
- elif typename == 'Capsule':
- yield "".join(parse_cnt(x['body']))
-
- elif typename in {
- 'TextInline', 'TextOnlyDocumentBlock', 'DocumentBlock', 
- 'SummaryBlock', 'VisualStackBlock'
- }:
+ elif typename == 'TextInline':
  yield "".join(parse_cnt(x))
 
+ elif typename in {'DetailBlock', 'TextRunKV'}:
+ yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
+
  elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}:
- if x.get('media'):
- yield "".join(parse_types(x['media']))
- elif "".join(parse_cnt(x)).strip():
- yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
+ if "".join(parse_cnt(x)).strip():
+ yield "".join(parse_cnt(x))
 
 def article_parse(data):
  yield "<html><body>"

diff --git a/recipes/science_advances.recipe b/recipes/science_advances.recipe
@@ -94,8 +94,7 @@ class scienceadv(BasicNewsRecipe):
 
  feeds = []
 
- div = soup.find('div', attrs={'class':'toc__body'})
- for sec in div.findAll('section', **classes('toc__section')):
+ for sec in soup.findAll('section', **classes('toc__section')):
  name = sec.find(**classes('sidebar-article-title--decorated'))
  section = self.tag_to_string(name).strip()
  self.log(section)

diff --git a/recipes/science_journal.recipe b/recipes/science_journal.recipe
@@ -92,8 +92,7 @@ class science(BasicNewsRecipe):
 
  feeds = []
 
- div = soup.find('div', attrs={'class':'toc__body'})
- for sec in div.findAll('section', **classes('toc__section')):
+ for sec in soup.findAll('section', **classes('toc__section')):
  name = sec.find(**classes('sidebar-article-title--decorated'))
  section = self.tag_to_string(name).strip()
  self.log(section)

diff --git a/recipes/sciimmunol.recipe b/recipes/sciimmunol.recipe
@@ -94,8 +94,7 @@ class scienceadv(BasicNewsRecipe):
 
  feeds = []
 
- div = soup.find('div', attrs={'class':'toc__body'})
- for sec in div.findAll('section', **classes('toc__section')):
+ for sec in soup.findAll('section', **classes('toc__section')):
  name = sec.find(**classes('sidebar-article-title--decorated'))
  section = self.tag_to_string(name).strip()
  self.log(section)

diff --git a/recipes/scirobotics.recipe b/recipes/scirobotics.recipe
@@ -94,8 +94,7 @@ class scienceadv(BasicNewsRecipe):
 
  feeds = []
 
- div = soup.find('div', attrs={'class':'toc__body'})
- for sec in div.findAll('section', **classes('toc__section')):
+ for sec in soup.findAll('section', **classes('toc__section')):
  name = sec.find(**classes('sidebar-article-title--decorated'))
  section = self.tag_to_string(name).strip()
  self.log(section)

diff --git a/recipes/scisignaling.recipe b/recipes/scisignaling.recipe
@@ -93,8 +93,7 @@ class scienceadv(BasicNewsRecipe):
 
  feeds = []
 
- div = soup.find('div', attrs={'class':'toc__body'})
- for sec in div.findAll('section', **classes('toc__section')):
+ for sec in soup.findAll('section', **classes('toc__section')):
  name = sec.find(**classes('sidebar-article-title--decorated'))
  section = self.tag_to_string(name).strip()
  self.log(section)

diff --git a/recipes/scistm.recipe b/recipes/scistm.recipe
@@ -94,8 +94,7 @@ class scienceadv(BasicNewsRecipe):
 
  feeds = []
 
- div = soup.find('div', attrs={'class':'toc__body'})
- for sec in div.findAll('section', **classes('toc__section')):
+ for sec in soup.findAll('section', **classes('toc__section')):
  name = sec.find(**classes('sidebar-article-title--decorated'))
  section = self.tag_to_string(name).strip()
  self.log(section)

diff --git a/src/calibre/web/site_parsers/nytimes.py b/src/calibre/web/site_parsers/nytimes.py
@@ -9,7 +9,7 @@
 
 from calibre.utils.iso8601 import parse_iso8601
 
-module_version = 7 # needed for live updates
+module_version = 8 # needed for live updates
 pprint
 
 
@@ -111,11 +111,12 @@ def parse_cnt(cnt):
  yield ''.join(parse_fmt_type(cnt))
  else:
  for cnt_ in cnt[k]:
- yield from parse_types(cnt_)
+ yield ''.join(parse_types(cnt_))
  if isinstance(cnt[k], dict):
- yield from parse_types(cnt[k])
- if cnt.get('text') and 'formats' not in cnt:
- yield cnt['text']
+ yield ''.join(parse_types(cnt[k]))
+ if cnt.get('text') and 'formats' not in cnt and 'content' not in cnt:
+ if isinstance(cnt['text'], str):
+ yield cnt['text']
 
 def parse_types(x):
  typename = x.get('__typename', '')
@@ -143,9 +144,6 @@ def parse_types(x):
  elif typename == 'RuleBlock':
  yield '<hr/>'
 
- elif typename in {'ImageBlock', 'VideoBlock', 'InteractiveBlock'}:
- yield "".join(parse_types(x['media']))
-
  elif typename == 'Image':
  yield "".join(parse_image(x))
 
@@ -161,25 +159,17 @@ def parse_types(x):
  elif typename == 'ListBlock':
  yield f'<ul>{"".join(parse_cnt(x))}</ul>'
  elif typename == 'ListItemBlock':
- yield f'<li>{"".join(parse_cnt(x))}</li>'
-
- elif typename == 'CapsuleBlock':
- if x['capsuleContent'].get('body'):
- yield "".join(parse_cnt(x['capsuleContent']['body']))
- elif typename == 'Capsule':
- yield "".join(parse_cnt(x['body']))
-
- elif typename in {
- 'TextInline', 'TextOnlyDocumentBlock', 'DocumentBlock', 
- 'SummaryBlock', 'VisualStackBlock'
- }:
+ yield f'\n<li>{"".join(parse_cnt(x))}</li>'
+
+ elif typename == 'TextInline':
  yield "".join(parse_cnt(x))
 
+ elif typename in {'DetailBlock', 'TextRunKV'}:
+ yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
+
  elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}:
- if x.get('media'):
- yield "".join(parse_types(x['media']))
- elif "".join(parse_cnt(x)).strip():
- yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
+ if "".join(parse_cnt(x)).strip():
+ yield "".join(parse_cnt(x))
 
 def article_parse(data):
  yield "<html><body>"