Skip to content

Commit

Permalink
v20151026-beta
Browse files Browse the repository at this point in the history
Implement Feature #95: dump url list to text file. Set
writeUrlInDescription = True to enable.
  • Loading branch information
Nandaka committed Oct 26, 2015
1 parent 98b3f58 commit e9eb814
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 1 deletion.
10 changes: 10 additions & 0 deletions PixivConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class PixivConfig:
deleteZipFile = False
enableInfiniteLoop = False
verifyImage = False
writeUrlInDescription = False

# IrfanView
createDownloadLists = False
Expand Down Expand Up @@ -415,6 +416,13 @@ def loadConfig(self, path=None):
self.verifyImage = False
haveError = True

try:
self.writeUrlInDescription = config.getboolean('Settings','writeUrlInDescription')
except ValueError:
print "writeUrlInDescription = False"
self.writeUrlInDescription = False
haveError = True

## except ConfigParser.NoOptionError:
## print 'Error at loadConfig():',sys.exc_info()
## print 'Failed to read configuration.'
Expand Down Expand Up @@ -490,6 +498,7 @@ def writeConfig(self, error=False, path=None):
config.set('Settings', 'deleteZipFile', self.deleteZipFile)
config.set('Settings', 'enableInfiniteLoop', self.enableInfiniteLoop)
config.set('Settings', 'verifyImage', self.verifyImage)
config.set('Settings', 'writeUrlInDescription', self.writeUrlInDescription)

config.add_section('Authentication')
config.set('Authentication', 'username', self.username)
Expand Down Expand Up @@ -585,6 +594,7 @@ def printConfig(self):
print ' - deleteZipFile =', self.deleteZipFile
print ' - enableInfiniteLoop =', self.enableInfiniteLoop
print ' - verifyImage =', self.verifyImage
print ' - writeUrlInDescription =', self.writeUrlInDescription

print ' [Pixiv]'
print ' - numberOfPage =', self.numberOfPage
Expand Down
2 changes: 1 addition & 1 deletion PixivConstant.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: UTF-8 -*-
# pylint: disable=I0011, C, C0302

PIXIVUTIL_VERSION = '20151019'
PIXIVUTIL_VERSION = '20151026-beta'
PIXIVUTIL_LINK = 'https://nandaka.wordpress.com/tag/pixiv-downloader/'
PIXIV_URL = 'http://www.pixiv.net'
PIXIV_URL_SSL = 'https://www.secure.pixiv.net/login.php'
Expand Down
9 changes: 9 additions & 0 deletions PixivHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,3 +602,12 @@ def generateSearchTagUrl(tags, page, title_caption, wild_card, oldest_first,

return url

def writeUrlInDescription(image):
if len(image.descriptionUrlList) > 0:
filename = "url_list_" + datetime.date.today().strftime("%Y%m%d") + ".txt"
info = codecs.open(filename, 'a', encoding='utf-8')
info.write("#" + str(image.imageId)+"\r\n")
for link in image.descriptionUrlList:
info.write(link + "\r\n")
info.close()

21 changes: 21 additions & 0 deletions PixivModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import zipfile
import codecs
import collections
import urllib
import PixivHelper
from PixivException import PixivException
from datetime import datetime
Expand Down Expand Up @@ -181,6 +182,7 @@ class PixivImage:
image_response_count = -1
ugoira_data = ""
dateFormat = None
descriptionUrlList = []

def __init__(self, iid=0, page=None, parent=None, fromBookmark=False, bookmark_count=-1, image_response_count=-1, dateFormat = None):
self.artist = parent
Expand All @@ -189,6 +191,7 @@ def __init__(self, iid=0, page=None, parent=None, fromBookmark=False, bookmark_c
self.imageId = iid
self.imageUrls = []
self.dateFormat = dateFormat
self.descriptionUrlList = []

if page != None:
## check is error page
Expand Down Expand Up @@ -319,6 +322,20 @@ def ParseInfo(self, page):
self.jd_rtc = int(page.find(attrs={'class':'rated-count'}).string)
self.jd_rtt = int(page.find(attrs={'class':'score-count'}).string)

descriptionPara = page.findAll("p", attrs={'class':'caption'})
if descriptionPara is not None and len(descriptionPara) > 0:
for para in descriptionPara:
links = para.findAll("a")
if links is not None and len(links) > 0:
for link in links:
link_str = link["href"]
# "/jump.php?http%3A%2F%2Farsenixc.deviantart.com%2Fart%2FWatchmaker-house-567480110"
if link_str.startswith("/jump.php?"):
link_str = link_str[10:]
link_str = urllib.unquote(link_str)
self.descriptionUrlList.append(link_str)


def ParseWorksData(self, page):
temp = page.find(attrs={'class':'meta'}).findAll('li')
#07/22/2011 03:09|512×600|RETAS STUDIO
Expand Down Expand Up @@ -533,6 +550,10 @@ def WriteInfo(self, filename):
info.write("BookmarkCount= " + str(self.bookmark_count) + "\r\n")
info.write("Link = http://www.pixiv.net/member_illust.php?mode=medium&illust_id=" + str(self.imageId) + "\r\n")
info.write("Ugoira Data= " + str(self.ugoira_data) + "\r\n")
if len(self.descriptionUrlList) > 0:
info.write("Urls =\r\n")
for link in self.descriptionUrlList:
info.write(" - " + link + "\r\n")
info.close()

def WriteUgoiraData(self, filename):
Expand Down
2 changes: 2 additions & 0 deletions PixivUtil2.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,8 @@ def process_image(mode, artist=None, image_id=None, user_dir='', bookmark=False,
if __config__.deleteZipFile:
PixivHelper.printAndLog('info', "Deleting zip file => " + filename)
os.remove(filename)
if __config__.writeUrlInDescription:
PixivHelper.writeUrlInDescription(image)


# Only save to db if all images is downloaded completely
Expand Down
5 changes: 5 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
20151026-beta:
- Fix Issue #96: page 100 is not downloaded for new illust page.
- Update test page.
- Implement Feature #95: dump url list to text file. Set writeUrlInDescription = True to enable.

20151019:
- Update proxy handler.
- Add download file verifier (image and ugoira).
Expand Down
4 changes: 4 additions & 0 deletions readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,10 @@ enableInfiniteLoop ==> Enable infinite loop for download by tags.
first).
verifyimage ==> Do image and zip checking after download. Set the value to
True to enable.
writeUrlInDescription ==> Write all url found in the image description to a text
file. Set to True to enable. The list will be saved to
to the application folder as url_list_<timestamp>.txt


=================================================================================
= list.txt Format =
Expand Down

0 comments on commit e9eb814

Please sign in to comment.