Skip to content

Commit

Permalink
v20151001
Browse files Browse the repository at this point in the history
Fix Issue #93: remove popular-introduction from tags search result
  • Loading branch information
Nandaka committed Oct 1, 2015
1 parent a2854ba commit 5c105d8
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 21 deletions.
2 changes: 1 addition & 1 deletion PixivConstant.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: UTF-8 -*-
# pylint: disable=I0011, C, C0302

PIXIVUTIL_VERSION = '20150924'
PIXIVUTIL_VERSION = '20151001'
PIXIVUTIL_LINK = 'https://nandaka.wordpress.com/tag/pixiv-downloader/'
PIXIV_URL = 'http://www.pixiv.net'
PIXIV_URL_SSL = 'https://www.secure.pixiv.net/login.php'
Expand Down
37 changes: 21 additions & 16 deletions PixivModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,29 +733,35 @@ class PixivTags:
itemList = None
haveImage = None
isLastPage = None
__re_illust = re.compile(r'member_illust.*illust_id=(\d*)')
__re_imageItemClass = re.compile(r".*\bimage-item\b.*")

def parseIgnoreSection(self, page, sectionName):
ignore = list()
showcases = page.findAll('section', attrs={'class': sectionName})
for showcase in showcases:
lis = showcase.findAll('li', attrs={'class':self.__re_imageItemClass})
for li in lis:
if str(li).find('member_illust.php?') > -1:
image_id = self.__re_illust.findall(li.find('a')['href'])[0]
ignore.append(image_id)
return ignore

def parseTags(self, page):
'''parse tags search page and return the image list with bookmarkCound and imageResponse'''
self.itemList = list()

__re_illust = re.compile(r'member_illust.*illust_id=(\d*)')

## get showcase
ignore = list()
showcases = page.findAll('section', attrs={'class': 'showcase'})
for showcase in showcases:
lis = showcase.findAll('li', attrs={'class':'image'})
for li in lis:
if str(li).find('member_illust.php?') > -1:
image_id = __re_illust.findall(li.find('a')['href'])[0]
ignore.append(image_id)
# ignore showcase and popular-introduction
ignore.extend(self.parseIgnoreSection(page, 'showcase'))
ignore.extend(self.parseIgnoreSection(page, 'popular-introduction'))

## new parse for bookmark items
imageItemClass = re.compile(r".*\bimage-item\b.*")
items = page.findAll('li', attrs={'class':imageItemClass})
# new parse for bookmark items
items = page.findAll('li', attrs={'class':self.__re_imageItemClass})
for item in items:
if str(item).find('member_illust.php?') > -1:
image_id = __re_illust.findall(item.find('a')['href'])[0]
image_id = self.__re_illust.findall(item.find('a')['href'])[0]
if not str(image_id).isdigit() or image_id in ignore:
continue

Expand All @@ -779,11 +785,10 @@ def parseMemberTags(self, page):
'''parse member tags search page and return the image list'''
self.itemList = list()

__re_illust = re.compile(r'member_illust.*illust_id=(\d*)')
linkList = page.findAll('a')
for link in linkList:
if link.has_key('href') :
result = __re_illust.findall(link['href'])
result = self.__re_illust.findall(link['href'])
if len(result) > 0 :
image_id = int(result[0])
self.itemList.append(PixivTagsItem(int(image_id), 0, 0))
Expand All @@ -805,7 +810,7 @@ def checkLastPage(self, page, fromMember=False):
self.isLastPage = True

if fromMember:
# check if the last page for member tags
# check if the last page for member tags
if self.isLastPage:
check = page.findAll(name='a', attrs={'class':'button', 'rel':'next'})
if len(check) > 0:
Expand Down
11 changes: 7 additions & 4 deletions PixivUtil2.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,13 +665,14 @@ def process_tags(mode, tags, page=1, end_page=0, wild_card=True, title_caption=F
try:
__config__.loadConfig(path=configfile) # Reset the config for root directory

# decode tags.
try:
if tags.startswith("%"):
search_tags = PixivHelper.toUnicode(urllib.unquote_plus(tags))
else:
search_tags = PixivHelper.toUnicode(tags)
except UnicodeDecodeError:
## From command prompt
# From command prompt
search_tags = tags.decode(sys.stdout.encoding).encode("utf8")
search_tags = PixivHelper.toUnicode(search_tags)

Expand All @@ -681,16 +682,17 @@ def process_tags(mode, tags, page=1, end_page=0, wild_card=True, title_caption=F

if not tags.startswith("%"):
try:
## Encode the tags
# Encode the tags
tags = tags.encode('utf-8')
tags = urllib.quote_plus(tags)
except UnicodeDecodeError:
try:
## from command prompt
# from command prompt
tags = urllib.quote_plus(tags.decode(sys.stdout.encoding).encode("utf8"))
except UnicodeDecodeError:
PixivHelper.printAndLog('error', 'Cannot decode the tags, you can use URL Encoder (http://meyerweb.com/eric/tools/dencoder/) and paste the encoded tag.')
__log__.exception('decodeTags()')

i = page
images = 1
last_image_id = -1
Expand Down Expand Up @@ -785,6 +787,7 @@ def process_tags(mode, tags, page=1, end_page=0, wild_card=True, title_caption=F
if __config__.enableInfiniteLoop and i == 1001 and oldest_first == False:
if last_image_id > 0:
# get the last date
PixivHelper.printAndLog('info', "Hit page 1000, trying to get workdate for last image id: " + str(last_image_id))
referer = 'http://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + str(last_image_id)
parse_medium_page = PixivBrowserFactory.getBrowser().getPixivPage(referer)
image = PixivImage(iid=last_image_id, page=parse_medium_page, dateFormat=__config__.dateFormat)
Expand All @@ -794,6 +797,7 @@ def process_tags(mode, tags, page=1, end_page=0, wild_card=True, title_caption=F
i = 1
end_date = _last_date
flag = True
last_image_id = -1
else:
PixivHelper.printAndLog('info', "No more image in the list.")
flag = False
Expand All @@ -813,7 +817,6 @@ def process_tags(mode, tags, page=1, end_page=0, wild_card=True, title_caption=F
PixivHelper.printAndLog('error', 'Cannot dump page for search tags:' + search_tags)
raise


def process_tags_list(mode, filename, page=1, end_page=0, wild_card=True, oldest_first=False, bookmark_count = None):
global ERROR_CODE

Expand Down
3 changes: 3 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
20151001:
- Fix Issue #93: remove popular introduction from tags search result.

20150924:
- Fix Issue #88: undefined variable when handling cookie.
- Externalize date format to config.ini, refer to http://strftime.org/ for formatting syntax.
Expand Down

0 comments on commit 5c105d8

Please sign in to comment.