diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index c908e44827..0127a6508f 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -27,6 +27,8 @@ class PixivExtractor(Extractor): filename_fmt = "{id}_p{num}.{extension}" archive_fmt = "{id}{suffix}.{extension}" cookies_domain = None + url_sanity = ("https://s.pximg.net/common/images" + "/limit_sanity_level_360.png") def _init(self): self.api = PixivAppAPI(self) @@ -46,8 +48,6 @@ def transform_tags(work): def transform_tags(work): work["tags"] = [tag["name"] for tag in work["tags"]] - url_sanity = ("https://s.pximg.net/common/images" - "/limit_sanity_level_360.png") ratings = {0: "General", 1: "R-18", 2: "R-18G"} meta_user = self.config("metadata") meta_bookmark = self.config("metadata-bookmark") @@ -60,11 +60,7 @@ def transform_tags(work): if not work["user"]["id"]: continue - meta_single_page = work["meta_single_page"] - meta_pages = work["meta_pages"] - del work["meta_single_page"] - del work["image_urls"] - del work["meta_pages"] + files = self._extract_files(work) if meta_user: work.update(self.api.user_detail(work["user"]["id"])) @@ -81,69 +77,82 @@ def transform_tags(work): work.update(metadata) yield Message.Directory, work + for work["num"], file in enumerate(files): + url = file["url"] + work.update(file) + work["date_url"] = self._date_from_url(url) + yield Message.Url, url, text.nameext_from_url(url, work) - if work["type"] == "ugoira": - if not self.load_ugoira: - continue + def _extract_files(self, work): + meta_single_page = work["meta_single_page"] + meta_pages = work["meta_pages"] + del work["meta_single_page"] + del work["image_urls"] + del work["meta_pages"] + files = [] + if work["type"] == "ugoira": + if self.load_ugoira: try: - ugoira = self.api.ugoira_metadata(work["id"]) + self._extract_ugoira(work, files) except exception.StopExtraction as exc: self.log.warning( "Unable to retrieve Ugoira metatdata (%s - %s)", - work.get("id"), exc.message) - continue - - url = ugoira["zip_urls"]["medium"] - work["frames"] = frames = ugoira["frames"] - work["date_url"] = self._date_from_url(url) - work["_http_adjust_extension"] = False - - if self.load_ugoira == "original": - base, sep, _ = url.rpartition("_ugoira") - base = base.replace( - "/img-zip-ugoira/", "/img-original/", 1) + sep - - for ext in ("jpg", "png", "gif"): - try: - url = ("{}0.{}".format(base, ext)) - self.request(url, method="HEAD") - break - except exception.HttpError: - pass - else: - self.log.warning( - "Unable to find Ugoira frame URLs (%s)", - work.get("id")) - continue - - for num, frame in enumerate(frames): - url = ("{}{}.{}".format(base, num, ext)) - work["num"] = work["_ugoira_frame_index"] = num - work["suffix"] = "_p{:02}".format(num) - text.nameext_from_url(url, work) - yield Message.Url, url, work - - else: - url = url.replace("_ugoira600x600", "_ugoira1920x1080") - yield Message.Url, url, text.nameext_from_url(url, work) - - elif work["page_count"] == 1: - url = meta_single_page["original_image_url"] - if url == url_sanity: - self.log.warning( - "Unable to download work %s ('sanity_level' warning)", - work["id"]) - continue - work["date_url"] = self._date_from_url(url) - yield Message.Url, url, text.nameext_from_url(url, work) + work["id"], exc.message) + + elif work["page_count"] == 1: + url = meta_single_page["original_image_url"] + if url == self.url_sanity: + self.log.warning( + "Unable to download work %s ('sanity_level' warning)", + work["id"]) + else: + files.append({"url": url}) + else: + for num, img in enumerate(meta_pages): + files.append({ + "url" : img["image_urls"]["original"], + "suffix": "_p{:02}".format(num), + }) + + return files + + def _extract_ugoira(self, work, files): + ugoira = self.api.ugoira_metadata(work["id"]) + url = ugoira["zip_urls"]["medium"] + work["frames"] = frames = ugoira["frames"] + work["date_url"] = self._date_from_url(url) + work["_http_adjust_extension"] = False + + if self.load_ugoira == "original": + base, sep, _ = url.rpartition("_ugoira") + base = base.replace("/img-zip-ugoira/", "/img-original/", 1) + sep + + for ext in ("jpg", "png", "gif"): + try: + url = "{}0.{}".format(base, ext) + self.request(url, method="HEAD") + break + except exception.HttpError: + pass else: - for work["num"], img in enumerate(meta_pages): - url = img["image_urls"]["original"] - work["date_url"] = self._date_from_url(url) - work["suffix"] = "_p{:02}".format(work["num"]) - yield Message.Url, url, text.nameext_from_url(url, work) + return self.log.warning( + "Unable to find Ugoira frame URLs (%s)", work["id"]) + + for num in range(len(frames)): + url = "{}{}.{}".format(base, num, ext) + files.append(text.nameext_from_url(url, { + "url": url, + "num": num, + "suffix": "_p{:02}".format(num), + "_ugoira_frame_index": num, + + })) + else: + files.append({ + "url": url.replace("_ugoira600x600", "_ugoira1920x1080", 1), + }) @staticmethod def _date_from_url(url, offset=timedelta(hours=9)): @@ -175,6 +184,9 @@ def _make_work(kind, url, user): "x_restrict" : 0, } + def _web_to_mobile(self, work): + return work + def works(self): """Return an iterable containing all relevant 'work' objects"""