Skip to content

Commit

Permalink
Merge pull request #1064 from dbhmw/main
Browse files Browse the repository at this point in the history
Ficbook.net More metadata collection
  • Loading branch information
JimmXinu authored Apr 24, 2024
2 parents d6f2faf + 0bb8421 commit 65bf03a
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 31 deletions.
19 changes: 19 additions & 0 deletions calibre-plugin/plugin-defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1932,6 +1932,25 @@ add_to_output_css:
white-space: pre-wrap;
}

# 'collections' may generate a lot requests if the work is in many collections. For every 30 there will be additional request made.
extra_valid_entries:dedication,authorcomment,likes,follows,reviews,bookmarks,numcollections,pages,collections

dedication_label:Dedication
authorcomment_label:Author Comment
likes_label:Likes
follows_label:Follows
reviews_label:Reviews
bookmarks_label:Bookmarks
numcollections_label:Collections
pages_label:Pages

# add_to_titlepage_entries:, likes, follows, reviews, bookmarks, numcollections, pages
add_to_wide_titlepage_entries:,dedication, authorcomment

# exclude_notes: headnotes,footnotes

add_to_comma_entries:,likes,follows,reviews,bookmarks

[fiction.live]
## Recommended if you include images, fiction.live tends to have many
## duplicated images.
Expand Down
129 changes: 98 additions & 31 deletions fanficfare/adapters/adapter_ficbooknet.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,72 @@ def extractChapterUrlsAndMetadata(self):
self.story.addToList('characters',stripHTML(paira))

summary=soup.find('div', itemprop='description')
self.setDescription(url,summary)
#self.story.setMetadata('description', summary.text)
# To get rid of an empty div on the title page.
if summary.get_text():
self.setDescription(url,summary)
#self.story.setMetadata('description', summary.text)

stats = soup.find('div', {'class' : 'mb-15 text-center'})
targetdata = stats.find_all('span', {'class' : 'main-info'})
for data in targetdata:
svg_class = data.find('svg')['class'][0] if data.find('svg') else None
value = int(stripHTML(data)) if stripHTML(data).isdigit() else 0

if svg_class == 'ic_thumbs-up' and value > 0:
self.story.setMetadata('likes', value)
elif svg_class == 'ic_bubble-dark' and value > 0:
self.story.setMetadata('reviews', value)
elif svg_class == 'ic_bookmark' and value > 0:
self.story.setMetadata('bookmarks', value)

follows = int(stats.find('fanfic-follow-button')[':follow-count'])
if follows > 0:
self.story.setMetadata('follows', follows)

collection = soup.find('fanfic-collections-link').find_parent('div')
if collection:
num_collections = int(collection.find('fanfic-collections-link')[':initial-count'])
if num_collections > 0:
self.story.setMetadata('numcollections', num_collections)
if "collections" in self.getConfigList('extra_valid_entries'):
collUrl = 'https://' + self.getSiteDomain() + soup.find('fanfic-collections-link')['url']
p = self.get_request(collUrl)
soupColl = self.make_soup(p)
targetcoll = soupColl.find_all('div', {'class' : 'collection-thumb-info'})
for coll in targetcoll:
o = coll.find('a', href=re.compile(r'/collections/'))
self.story.addToList('collections', stripHTML(o))

if soupColl.find('div', {'class' : 'paging-description'}):
collpg = soupColl.find('div', {'class' : 'paging-description'}).select_one('div.paging-description b:last-child').text
print(collpg)
for c in range(int(collpg), 1, -1):
soupColl = self.make_soup(self.get_request(collUrl + '?p=' + str(c)))
targetcoll = soupColl.find_all('div', {'class' : 'collection-thumb-info'})
for coll in targetcoll:
o = coll.find('a', href=re.compile(r'/collections/'))
self.story.addToList('collections', stripHTML(o))
if self.getMetadata('collections') != num_collections:
logger.debug("Collections mismatch: (" + self.story.getMetadata('collections') + '/' + num_collections)

logger.debug("Collections: (%s)"%self.story.getMetadata('collections'))


targetpages = soup.find('strong',string='Размер:').find_next('div')
if targetpages:
pages = re.findall(r'([\d,]+)\s+страницы', targetpages.text)
self.story.setMetadata('pages', pages)

# Find dedication.
ded = soup.find('div', {'class' : 'js-public-beta-dedication'})
if ded != None:
self.story.setMetadata('dedication',stripHTML(ded))

# Find author comment
comm = soup.find('div', {'class' : 'js-public-beta-author-comment'})
if comm != None:
self.story.setMetadata('authorcomment',stripHTML(comm))


# grab the text for an individual chapter.
def getChapterText(self, url):
Expand All @@ -241,34 +305,37 @@ def getChapterText(self, url):
for ads in chapter.find_all('div', {'class' : 'ads-in-text'}):
ads.extract()

# Find the headnote
head_note = soup.find('div', {'class': 'part-comment-top'})
if head_note:
head_notes_content = head_note.find('div', {'class': 'js-public-beta-comment-before'}).get_text(strip=True)
# Create the structure for the headnote
head_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_head_notes'})
head_b_tag = soup.new_tag('b')
head_b_tag.string = 'Примечания:'
head_blockquote_tag = soup.new_tag('blockquote')
head_blockquote_tag.string = head_notes_content
head_notes_div_tag.append(head_b_tag)
head_notes_div_tag.append(head_blockquote_tag)
# Prepend the headnotes to the chapter
chapter.insert(0, head_notes_div_tag)

# Find the endnote
end_note = soup.find('div', {'class': 'part-comment-bottom'})
if end_note:
end_notes_content = end_note.find('div', {'class': 'js-public-beta-comment-after'}).get_text(strip=True)
# Create the structure for the footnote
end_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_foot_notes'})
end_b_tag = soup.new_tag('b')
end_b_tag.string = 'Примечания:'
end_blockquote_tag = soup.new_tag('blockquote')
end_blockquote_tag.string = end_notes_content
end_notes_div_tag.append(end_b_tag)
end_notes_div_tag.append(end_blockquote_tag)
# Append the endnotes to the chapter
chapter.append(end_notes_div_tag)
exclude_notes=self.getConfigList('exclude_notes')
if 'headnotes' not in exclude_notes:
# Find the headnote
head_note = soup.find('div', {'class': 'part-comment-top'})
if head_note:
head_notes_content = head_note.find('div', {'class': 'js-public-beta-comment-before'}).get_text(strip=True)
# Create the structure for the headnote
head_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_head_notes'})
head_b_tag = soup.new_tag('b')
head_b_tag.string = 'Примечания:'
head_blockquote_tag = soup.new_tag('blockquote')
head_blockquote_tag.string = head_notes_content
head_notes_div_tag.append(head_b_tag)
head_notes_div_tag.append(head_blockquote_tag)
# Prepend the headnotes to the chapter
chapter.insert(0, head_notes_div_tag)

if 'footnotes' not in exclude_notes:
# Find the endnote
end_note = soup.find('div', {'class': 'part-comment-bottom'})
if end_note:
end_notes_content = end_note.find('div', {'class': 'js-public-beta-comment-after'}).get_text(strip=True)
# Create the structure for the footnote
end_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_foot_notes'})
end_b_tag = soup.new_tag('b')
end_b_tag.string = 'Примечания:'
end_blockquote_tag = soup.new_tag('blockquote')
end_blockquote_tag.string = end_notes_content
end_notes_div_tag.append(end_b_tag)
end_notes_div_tag.append(end_blockquote_tag)
# Append the endnotes to the chapter
chapter.append(end_notes_div_tag)

return self.utf8FromSoup(url,chapter)
19 changes: 19 additions & 0 deletions fanficfare/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1927,6 +1927,25 @@ add_to_output_css:
white-space: pre-wrap;
}

# 'collections' may generate a lot requests if the work is in many collections. For every 30 there will be additional request made.
extra_valid_entries:dedication,authorcomment,likes,follows,reviews,bookmarks,numcollections,pages,collections

dedication_label:Dedication
authorcomment_label:Author Comment
likes_label:Likes
follows_label:Follows
reviews_label:Reviews
bookmarks_label:Bookmarks
numcollections_label:Collections
pages_label:Pages

# add_to_titlepage_entries:, likes, follows, reviews, bookmarks, numcollections, pages
add_to_wide_titlepage_entries:,dedication, authorcomment

# exclude_notes: headnotes,footnotes

add_to_comma_entries:,likes,follows,reviews,bookmarks

[fiction.live]
## Recommended if you include images, fiction.live tends to have many
## duplicated images.
Expand Down

0 comments on commit 65bf03a

Please sign in to comment.