base_efiction: .string -> stripHTML for nested tags Closes #984

JimmXinu · Aug 5, 2023 · 930ba5b · 930ba5b
1 parent fb552c8
commit 930ba5b
Showing 1 changed file with 5 additions and 5 deletions.
diff --git a/fanficfare/adapters/base_efiction_adapter.py b/fanficfare/adapters/base_efiction_adapter.py
@@ -409,9 +409,9 @@ def extractChapterUrlsAndMetadata(self):
         pagetitleDiv = soup.find("div", {"id": "pagetitle"})
         if pagetitleDiv.find('a') is None:
             raise exceptions.FailedToDownload("Couldn't find title and author")
-        self.story.setMetadata('title', pagetitleDiv.find("a").string)
+        self.story.setMetadata('title', stripHTML(pagetitleDiv.find("a")))
         authorLink = pagetitleDiv.findAll("a")[1]
-        self.story.setMetadata('author', authorLink.string)
+        self.story.setMetadata('author', stripHTML(authorLink))
         self.story.setMetadata('authorId', re.search(r"\d+", authorLink['href']).group(0))
         self.story.setMetadata('authorUrl', self.getViewUserUrl(self.story.getMetadata('authorId')))
 
@@ -431,7 +431,7 @@ def extractChapterUrlsAndMetadata(self):
                 else:
                     valueStr += unicode(nextEl)
                 nextEl = nextEl.nextSibling
-            key = labelSpan.string.strip()
+            key = stripHTML(labelSpan)
 
             ## strip trailing colons
             key = re.sub(r"\s*:\s*$", "", key)
@@ -467,8 +467,8 @@ def extractChapterUrlsAndMetadata(self):
                 ## Had a problem with an author putting <b>0.</b> in the text.
                 if chapterLink and chapterLink['href'].startswith('#'):
                     chapterLink['href'] = "%s&chapter=%s" % (self.url, chapterId)
-                    if chapterLink.string != self.getBacktoIndex():
-                        self.add_chapter(chapterLink.string, chapterLink['href'])
+                    if stripHTML(chapterLink) != self.getBacktoIndex():
+                        self.add_chapter(stripHTML(chapterLink), chapterLink['href'])
 
         ## Store reference to soup for getChapterText
         self.html = soup