Skip to content

Commit

Permalink
base_efiction: .string -> stripHTML for nested tags Closes #984
Browse files Browse the repository at this point in the history
  • Loading branch information
JimmXinu committed Aug 5, 2023
1 parent fb552c8 commit 930ba5b
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions fanficfare/adapters/base_efiction_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,9 +409,9 @@ def extractChapterUrlsAndMetadata(self):
pagetitleDiv = soup.find("div", {"id": "pagetitle"})
if pagetitleDiv.find('a') is None:
raise exceptions.FailedToDownload("Couldn't find title and author")
self.story.setMetadata('title', pagetitleDiv.find("a").string)
self.story.setMetadata('title', stripHTML(pagetitleDiv.find("a")))
authorLink = pagetitleDiv.findAll("a")[1]
self.story.setMetadata('author', authorLink.string)
self.story.setMetadata('author', stripHTML(authorLink))
self.story.setMetadata('authorId', re.search(r"\d+", authorLink['href']).group(0))
self.story.setMetadata('authorUrl', self.getViewUserUrl(self.story.getMetadata('authorId')))

Expand All @@ -431,7 +431,7 @@ def extractChapterUrlsAndMetadata(self):
else:
valueStr += unicode(nextEl)
nextEl = nextEl.nextSibling
key = labelSpan.string.strip()
key = stripHTML(labelSpan)

## strip trailing colons
key = re.sub(r"\s*:\s*$", "", key)
Expand Down Expand Up @@ -467,8 +467,8 @@ def extractChapterUrlsAndMetadata(self):
## Had a problem with an author putting <b>0.</b> in the text.
if chapterLink and chapterLink['href'].startswith('#'):
chapterLink['href'] = "%s&chapter=%s" % (self.url, chapterId)
if chapterLink.string != self.getBacktoIndex():
self.add_chapter(chapterLink.string, chapterLink['href'])
if stripHTML(chapterLink) != self.getBacktoIndex():
self.add_chapter(stripHTML(chapterLink), chapterLink['href'])

## Store reference to soup for getChapterText
self.html = soup
Expand Down

0 comments on commit 930ba5b

Please sign in to comment.