Skip to content

Commit

Permalink
Fixes for literotica sites changes. Issue #671
Browse files Browse the repository at this point in the history
  • Loading branch information
JimmXinu committed Mar 19, 2021
1 parent f324c28 commit d6c7064
Showing 1 changed file with 17 additions and 10 deletions.
27 changes: 17 additions & 10 deletions fanficfare/adapters/adapter_literotica.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,15 @@ def extractChapterUrlsAndMetadata(self):
raise exceptions.StoryDoesNotExist("This submission is awaiting moderator's approval. %s"%self.url)

# author
a = soup1.find("span", "b-story-user-y")
self.story.setMetadata('authorId', urlparse.parse_qs(a.a['href'].split('?')[1])['uid'][0])
authorurl = a.a['href']
authora = soup1.find("a", class_="y_eU")
authorurl = authora['href']
# logger.debug(authora)
# logger.debug(authorurl)
self.story.setMetadata('authorId', urlparse.parse_qs(authorurl.split('?')[1])['uid'][0])
if authorurl.startswith('//'):
authorurl = self.parsedUrl.scheme+':'+authorurl
self.story.setMetadata('authorUrl', authorurl)
self.story.setMetadata('author', a.text)
self.story.setMetadata('author', authora.text)

# get the author page
dataAuth = self.get_request(authorurl)
Expand Down Expand Up @@ -232,7 +234,7 @@ def extractChapterUrlsAndMetadata(self):
chapterLink = chapterTr.find("td", "fc").find("a")
if self.getConfig('chapter_categories_use_all'):
self.story.addToList('category', chapterTr.findAll("td")[2].text)
self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text)
# self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text)
pub_date = makeDate(chapterTr.findAll('td')[-1].text, self.dateformat)
dates.append(pub_date)
chapterTr = chapterTr.nextSibling
Expand Down Expand Up @@ -307,7 +309,7 @@ def extractChapterUrlsAndMetadata(self):


# Add the category from the breadcumb. This might duplicate a category already added.
self.story.addToList('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string)
self.story.addToList('category', soup1.find('div', id='BreadCrumbComponent').findAll('a')[1].string)
self.getCategories(soup1)

return
Expand All @@ -320,7 +322,7 @@ def getPageText(self, raw_page, url):
# logger.debug("\tChapter text: %s" % raw_page)
page_soup = self.make_soup(raw_page)
[comment.extract() for comment in page_soup.findAll(text=lambda text:isinstance(text, Comment))]
story2 = page_soup.find('div', 'b-story-body-x').div
story2 = page_soup.find('div', 'aa_ht').div
# logger.debug('getPageText - story2: %s' % story2)

fullhtml = unicode(story2)
Expand All @@ -338,8 +340,7 @@ def getChapterText(self, url):

raw_page = self.get_request(url)
page_soup = self.make_soup(raw_page)
pages = page_soup.find('select', {'name' : 'page'})
page_nums = [page.text for page in pages.findAll('option')] if pages else 0
pages = page_soup.find('div',class_='l_bH')

fullhtml = ""
self.getCategories(page_soup)
Expand All @@ -350,7 +351,13 @@ def getChapterText(self, url):
chapter_description = '<p><b>Description:</b> %s</p><hr />' % chapter_description
fullhtml += self.getPageText(raw_page, url)
if pages:
for page_no in range(2, len(page_nums) + 1):
## look for highest numbered page, they're not all listed
## when there are many.

last_page_link = pages.find_all('a', class_='l_bJ')[-1]
last_page_no = int(urlparse.parse_qs(last_page_link['href'].split('?')[1])['page'][0])
# logger.debug(last_page_no)
for page_no in range(2, last_page_no+1):
page_url = url + "?page=%s" % page_no
# logger.debug("page_url= %s" % page_url)
raw_page = self.get_request(page_url)
Expand Down

0 comments on commit d6c7064

Please sign in to comment.