Skip to content

Commit

Permalink
[pixiv] move file/ugoira extraction into a separate function
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Oct 4, 2024
1 parent 3fa639f commit d1432d0
Showing 1 changed file with 74 additions and 62 deletions.
136 changes: 74 additions & 62 deletions gallery_dl/extractor/pixiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class PixivExtractor(Extractor):
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
cookies_domain = None
url_sanity = ("https://s.pximg.net/common/images"
"/limit_sanity_level_360.png")

def _init(self):
self.api = PixivAppAPI(self)
Expand All @@ -46,8 +48,6 @@ def transform_tags(work):
def transform_tags(work):
work["tags"] = [tag["name"] for tag in work["tags"]]

url_sanity = ("https://s.pximg.net/common/images"
"/limit_sanity_level_360.png")
ratings = {0: "General", 1: "R-18", 2: "R-18G"}
meta_user = self.config("metadata")
meta_bookmark = self.config("metadata-bookmark")
Expand All @@ -60,11 +60,7 @@ def transform_tags(work):
if not work["user"]["id"]:
continue

meta_single_page = work["meta_single_page"]
meta_pages = work["meta_pages"]
del work["meta_single_page"]
del work["image_urls"]
del work["meta_pages"]
files = self._extract_files(work)

if meta_user:
work.update(self.api.user_detail(work["user"]["id"]))
Expand All @@ -81,69 +77,82 @@ def transform_tags(work):
work.update(metadata)

yield Message.Directory, work
for work["num"], file in enumerate(files):
url = file["url"]
work.update(file)
work["date_url"] = self._date_from_url(url)
yield Message.Url, url, text.nameext_from_url(url, work)

if work["type"] == "ugoira":
if not self.load_ugoira:
continue
def _extract_files(self, work):
meta_single_page = work["meta_single_page"]
meta_pages = work["meta_pages"]
del work["meta_single_page"]
del work["image_urls"]
del work["meta_pages"]

files = []
if work["type"] == "ugoira":
if self.load_ugoira:
try:
ugoira = self.api.ugoira_metadata(work["id"])
self._extract_ugoira(work, files)
except exception.StopExtraction as exc:
self.log.warning(
"Unable to retrieve Ugoira metatdata (%s - %s)",
work.get("id"), exc.message)
continue

url = ugoira["zip_urls"]["medium"]
work["frames"] = frames = ugoira["frames"]
work["date_url"] = self._date_from_url(url)
work["_http_adjust_extension"] = False

if self.load_ugoira == "original":
base, sep, _ = url.rpartition("_ugoira")
base = base.replace(
"/img-zip-ugoira/", "/img-original/", 1) + sep

for ext in ("jpg", "png", "gif"):
try:
url = ("{}0.{}".format(base, ext))
self.request(url, method="HEAD")
break
except exception.HttpError:
pass
else:
self.log.warning(
"Unable to find Ugoira frame URLs (%s)",
work.get("id"))
continue

for num, frame in enumerate(frames):
url = ("{}{}.{}".format(base, num, ext))
work["num"] = work["_ugoira_frame_index"] = num
work["suffix"] = "_p{:02}".format(num)
text.nameext_from_url(url, work)
yield Message.Url, url, work

else:
url = url.replace("_ugoira600x600", "_ugoira1920x1080")
yield Message.Url, url, text.nameext_from_url(url, work)

elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
if url == url_sanity:
self.log.warning(
"Unable to download work %s ('sanity_level' warning)",
work["id"])
continue
work["date_url"] = self._date_from_url(url)
yield Message.Url, url, text.nameext_from_url(url, work)
work["id"], exc.message)

elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
if url == self.url_sanity:
self.log.warning(
"Unable to download work %s ('sanity_level' warning)",
work["id"])
else:
files.append({"url": url})

else:
for num, img in enumerate(meta_pages):
files.append({
"url" : img["image_urls"]["original"],
"suffix": "_p{:02}".format(num),
})

return files

def _extract_ugoira(self, work, files):
ugoira = self.api.ugoira_metadata(work["id"])
url = ugoira["zip_urls"]["medium"]
work["frames"] = frames = ugoira["frames"]
work["date_url"] = self._date_from_url(url)
work["_http_adjust_extension"] = False

if self.load_ugoira == "original":
base, sep, _ = url.rpartition("_ugoira")
base = base.replace("/img-zip-ugoira/", "/img-original/", 1) + sep

for ext in ("jpg", "png", "gif"):
try:
url = "{}0.{}".format(base, ext)
self.request(url, method="HEAD")
break
except exception.HttpError:
pass
else:
for work["num"], img in enumerate(meta_pages):
url = img["image_urls"]["original"]
work["date_url"] = self._date_from_url(url)
work["suffix"] = "_p{:02}".format(work["num"])
yield Message.Url, url, text.nameext_from_url(url, work)
return self.log.warning(
"Unable to find Ugoira frame URLs (%s)", work["id"])

for num in range(len(frames)):
url = "{}{}.{}".format(base, num, ext)
files.append(text.nameext_from_url(url, {
"url": url,
"num": num,
"suffix": "_p{:02}".format(num),
"_ugoira_frame_index": num,

}))
else:
files.append({
"url": url.replace("_ugoira600x600", "_ugoira1920x1080", 1),
})

@staticmethod
def _date_from_url(url, offset=timedelta(hours=9)):
Expand Down Expand Up @@ -175,6 +184,9 @@ def _make_work(kind, url, user):
"x_restrict" : 0,
}

def _web_to_mobile(self, work):
return work

def works(self):
"""Return an iterable containing all relevant 'work' objects"""

Expand Down

0 comments on commit d1432d0

Please sign in to comment.