Skip to content

Commit

Permalink
merge #6607: [lofter] add initial support
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Dec 11, 2024
2 parents 0e942f0 + 717081d commit 63008f7
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,12 @@ Consider all listed sites to potentially be NSFW.
<td>Blogs, Posts</td>
<td></td>
</tr>
<tr>
<td>LOFTER</td>
<td>https://www.lofter.com/</td>
<td>Blog Posts, Posts</td>
<td></td>
</tr>
<tr>
<td>Luscious</td>
<td>https://members.luscious.net/</td>
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
"lexica",
"lightroom",
"livedoor",
"lofter",
"luscious",
"lynxchan",
"mangadex",
Expand Down
147 changes: 147 additions & 0 deletions gallery_dl/extractor/lofter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://www.lofter.com/"""

from .common import Extractor, Message
from .. import text, util, exception


class LofterExtractor(Extractor):
"""Base class for lofter extractors"""
category = "lofter"
root = "https://www.lofter.com"
directory_fmt = ("{category}", "{blog_name}")
filename_fmt = "{id}_{num}.{extension}"
archive_fmt = "{id}_{num}"

def _init(self):
self.api = LofterAPI(self)

def items(self):
for post in self.posts():
if "post" in post:
post = post["post"]

post["blog_name"] = post["blogInfo"]["blogName"]
post["date"] = text.parse_timestamp(post["publishTime"] // 1000)
post_type = post["type"]

# Article
if post_type == 1:
content = post["content"]
image_urls = text.extract_iter(content, '<img src="', '"')
image_urls = [text.unescape(x) for x in image_urls]
image_urls = [x.partition("?")[0] for x in image_urls]

# Photo
elif post_type == 2:
photo_links = util.json_loads(post["photoLinks"])
image_urls = [x["orign"] for x in photo_links]
image_urls = [x.partition("?")[0] for x in image_urls]

# Video
elif post_type == 4:
embed = util.json_loads(post["embed"])
image_urls = [embed["originUrl"]]

# Answer
elif post_type == 5:
images = util.json_loads(post["images"])
image_urls = [x["orign"] for x in images]
image_urls = [x.partition("?")[0] for x in image_urls]

else:
image_urls = ()
self.log.warning(
"%s: Unsupported post type '%s'.",
post["id"], post_type)

post["count"] = len(image_urls)
yield Message.Directory, post
for post["num"], url in enumerate(image_urls, 1):
yield Message.Url, url, text.nameext_from_url(url, post)

def posts(self):
return ()


class LofterPostExtractor(LofterExtractor):
"""Extractor for a lofter post"""
subcategory = "post"
pattern = r"(?:https?://)?[\w-]+\.lofter\.com/post/([0-9a-f]+)_([0-9a-f]+)"
example = "https://BLOG.lofter.com/post/12345678_90abcdef"

def posts(self):
blog_id, post_id = self.groups
post = self.api.post(int(blog_id, 16), int(post_id, 16))
return (post,)


class LofterBlogPostsExtractor(LofterExtractor):
"""Extractor for a lofter blog's posts"""
subcategory = "blog-posts"
pattern = (r"(?:https?://)?(?:"
# https://www.lofter.com/front/blog/home-page/<blog_name>
r"www\.lofter\.com/front/blog/home-page/([\w-]+)|"
# https://<blog_name>.lofter.com/
r"([\w-]+)\.lofter\.com"
r")/?(?:$|\?|#)")
example = "https://BLOG.lofter.com/"

def posts(self):
blog_name = self.groups[0] or self.groups[1]
return self.api.blog_posts(blog_name)


class LofterAPI():

def __init__(self, extractor):
self.extractor = extractor

def blog_posts(self, blog_name):
endpoint = "/v2.0/blogHomePage.api"
params = {
"method": "getPostLists",
"offset": 0,
"limit": 200,
"blogdomain": blog_name + ".lofter.com",
}
return self._pagination(endpoint, params)

def post(self, blog_id, post_id):
endpoint = "/oldapi/post/detail.api"
params = {
"targetblogid": blog_id,
"postid": post_id,
}
return self._call(endpoint, params)["posts"][0]

def _call(self, endpoint, data):
url = "https://api.lofter.com" + endpoint
params = {
'product': 'lofter-android-7.9.10'
}
response = self.extractor.request(
url, method="POST", params=params, data=data)
info = response.json()

if info["meta"]["status"] != 200:
self.extractor.log.debug("Server response: %s", info)
raise exception.StopExtraction("API request failed")

return info["response"]

def _pagination(self, endpoint, params):
while True:
data = self._call(endpoint, params)
posts = data["posts"]

yield from posts

if params["offset"] + len(posts) < data["offset"]:
break
params["offset"] = data["offset"]
4 changes: 4 additions & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
"kemonoparty" : "Kemono",
"koharu" : "SchaleNetwork",
"livedoor" : "livedoor Blog",
"lofter" : "LOFTER",
"ohpolly" : "Oh Polly",
"omgmiamiswimwear": "Omg Miami Swimwear",
"mangadex" : "MangaDex",
Expand Down Expand Up @@ -266,6 +267,9 @@
"lensdump": {
"albums": "",
},
"lofter": {
"blog-posts": "Blog Posts",
},
"mangadex": {
"feed" : "Followed Feed",
},
Expand Down
59 changes: 59 additions & 0 deletions test/results/lofter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

from gallery_dl.extractor import lofter


__tests__ = (
{
"#url" : "https://gengar563.lofter.com/post/1e82da8c_1c98dae1b",
"#class": lofter.LofterPostExtractor,
"#urls" : (
"https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJQ1RxY0lYaU1UUE9tQ0NvUE9rVXFpOFFEVzMwbnQ4aEFnPT0.jpg",
"https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJRWlXYTRVOEpXTU9TSGt3TjBDQ0JFZVpZMEJtWjFneVNBPT0.png",
"https://imglf6.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJR1d3Y2VvbTNTQlIvdFU1WWlqZHEzbjI4MFVNZVdoN3VBPT0.png",
"https://imglf6.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJTi83NDRDUjNvd3hySGxEZFovd2hwbi9oaG9NQ1hOUkZ3PT0.png",
"https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJUFczb2RKSVlpMHJkNy9kc3BSQVQvQm5DNzB4eVhxay9nPT0.png",
"https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSStJZE9RYnJURktHazdIVHNNMjQ5eFJldHVTQy9XbDB3PT0.png",
"https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSzFCWFlnUWgzb01DcUdpT1lreG5yQjJVMkhGS09HNGR3PT0.png",
),

"blog_name": "gengar563",
"content" : "<p>发了三次发不出有毒……</p> \n<p>二部运动au&nbsp;&nbsp;性转ac注意</p> \n<p>失去耐心.jpg</p>",
"date" : "dt:2020-06-04 12:51:42",
"id" : 7676472859,
},

{
"#url" : "https://wooden-brain.lofter.com/post/1e60de5b_1c9bf8efb",
"#comment": "video",
"#class" : lofter.LofterPostExtractor,
"#urls" : (
"https://vodm2lzexwq.vod.126.net/vodm2lzexwq/Pc5jg1nL_3039990631_sd.mp4?resId=254486990bfa2cd7aa860229db639341_3039990631_1&sign=4j02HTHXqNfhaF%2B%2FO14Ny%2F9SMNZj%2FIjpJDCqXfYa4aM%3D",
),

"blog_name": "wooden-brain",
"date" : "dt:2020-06-24 11:01:59",
"id" : 7679741691,
},

{
"#url" : "https://gengar563.lofter.com/",
"#class": lofter.LofterBlogPostsExtractor,
"#range": "1-25",
"#count": 25,

"blog_name": "gengar563",
"date" : "type:datetime",
"id" : int,
},

{
"#url" : "https://www.lofter.com/front/blog/home-page/gengar563",
"#class": lofter.LofterBlogPostsExtractor,
},

)

0 comments on commit 63008f7

Please sign in to comment.