From c7d17f1111344e7cfd692dffdddd733bdce320c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 10 Feb 2024 00:01:55 +0100 Subject: [PATCH] [bluesky] extract 'hashtags', 'mentions', and 'uris' metadata (#4438) --- gallery_dl/extractor/bluesky.py | 15 +++++++++++ test/results/bluesky.py | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index bd183bd5be1..77b757693b2 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -45,6 +45,21 @@ def items(self): if "images" in media: images = media["images"] + if "facets" in post: + post["hashtags"] = tags = [] + post["mentions"] = dids = [] + post["uris"] = uris = [] + for facet in post["facets"]: + features = facet["features"][0] + if "tag" in features: + tags.append(features["tag"]) + elif "did" in features: + dids.append(features["did"]) + elif "uri" in features: + uris.append(features["uri"]) + else: + post["hashtags"] = post["mentions"] = post["uris"] = () + post["post_id"] = post["uri"].rpartition("/")[2] post["count"] = len(images) post["date"] = text.parse_datetime( diff --git a/test/results/bluesky.py b/test/results/bluesky.py index 7b9278abd87..9964433293f 100644 --- a/test/results/bluesky.py +++ b/test/results/bluesky.py @@ -39,6 +39,51 @@ "repostCount": int, "uri" : "at://did:plc:z72i7hdynmk6r22z27h6tvur/app.bsky.feed.post/3kh5rarr3gn2n", "width" : 1200, + "hashtags" : [], + "mentions" : [], + "uris" : ["https://blueskyweb.xyz/blog/12-21-2023-butterfly"], +}, + +{ + "#url" : "https://bsky.app/profile/mikf.bsky.social/post/3kkzc3xaf5m2w", + "#category": ("", "bluesky", "post"), + "#class" : bluesky.BlueskyPostExtractor, + "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreib7ydpe3xxo4cq7nn32w7eqhcanfaanz6caepd2z4kzplxtx2ctgi", + "#sha1_content": "9cf5748f6d00aae83fbb3cc2c6eb3caa832b90f4", + + "author": { + "did" : "did:plc:cslxjqkeexku6elp5xowxkq7", + "displayName": "mikf", + "handle" : "mikf.bsky.social", + "labels" : [], + }, + "cid" : "bafyreihtck7clocti2qshaiounadof74pxqhz7gnvbstxujqzhlodigqru", + "count" : 1, + "createdAt" : "2024-02-09T21:57:31.917Z", + "date" : "dt:2024-02-09 21:57:31", + "description": "reading lewd books", + "extension" : "jpeg", + "filename" : "bafkreib7ydpe3xxo4cq7nn32w7eqhcanfaanz6caepd2z4kzplxtx2ctgi", + "hashtags" : [ + "patchouli", + "patchy", + ], + "mentions" : [ + "did:plc:cslxjqkeexku6elp5xowxkq7", + ], + "uris" : [ + "https://seiga.nicovideo.jp/seiga/im5977527", + ], + "width" : 1024, + "height" : 768, + "langs" : ["en"], + "likeCount" : int, + "num" : 1, + "post_id" : "3kkzc3xaf5m2w", + "replyCount" : int, + "repostCount": int, + "text" : "testing \"facets\"\n\nsource: seiga.nicovideo.jp/seiga/im5977...\n#patchouli #patchy\n@mikf.bsky.social", + "uri" : "at://did:plc:cslxjqkeexku6elp5xowxkq7/app.bsky.feed.post/3kkzc3xaf5m2w", }, )