Skip to content

Commit

Permalink
[twitter] improve 'cards-blacklist' (#2875)
Browse files Browse the repository at this point in the history
allow blacklisting domains and 'name:domain',
where 'domain' depends on a card's 'vanity_url' value
  • Loading branch information
mikf committed Sep 17, 2022
1 parent aaf6992 commit e99a9b2
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 20 deletions.
10 changes: 8 additions & 2 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2362,9 +2362,15 @@ extractor.twitter.cards-blacklist
Type
``list`` of ``strings``
Example
``["player", "summary"]``
``["summary", "youtube.com", "player:twitch.tv"]``
Description
List of card types to ignore
List of card types to ignore.

Possible values are

* card names
* card domains
* ``<card name>:<card domain>``


extractor.twitter.conversations
Expand Down
39 changes: 21 additions & 18 deletions gallery_dl/extractor/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, match):
self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
self.cards_blacklist = self.config("cards-blacklist") or ()
self.cards_blacklist = self.config("cards-blacklist")
self._user = self._user_obj = None
self._user_cache = {}
self._init_sizes()
Expand Down Expand Up @@ -180,16 +180,21 @@ def _extract_card(self, tweet, files):
card = card["legacy"]

name = card["name"].rpartition(":")[2]
if name in self.cards_blacklist:
return
bvals = card["binding_values"]
if isinstance(bvals, list):
bvals = {bval["key"]: bval["value"]
for bval in card["binding_values"]}

cbl = self.cards_blacklist
if cbl:
if name in cbl:
return
if "vanity_url" in bvals:
domain = bvals["vanity_url"]["string_value"]
if domain in cbl or name + ":" + domain in cbl:
return

if name in ("summary", "summary_large_image"):
bvals = card["binding_values"]
if isinstance(bvals, list):
bvals = {
bval["key"]: bval["value"]
for bval in card["binding_values"]
}
for prefix in ("photo_image_full_size_",
"summary_photo_image_",
"thumbnail_image_"):
Expand All @@ -206,15 +211,7 @@ def _extract_card(self, tweet, files):
files.append(value)
return
elif name == "unified_card":
bvals = card["binding_values"]
if isinstance(bvals, list):
for bval in card["binding_values"]:
if bval["key"] == "unified_card":
bval = bval["value"]["string_value"]
break
else:
bval = bvals["unified_card"]["string_value"]
data = json.loads(bval)
data = json.loads(bvals["unified_card"]["string_value"])
self._extract_media(tweet, data["media_entities"].values(), files)
return

Expand Down Expand Up @@ -761,6 +758,12 @@ class TwitterTweetExtractor(TwitterExtractor):
("https://twitter.com/i/web/status/1466183847628865544", {
"count": 0,
}),
# 'cards-blacklist' option
("https://twitter.com/i/web/status/1571141912295243776", {
"options": (("cards", "ytdl"),
("cards-blacklist", ("twitch.tv",))),
"count": 0,
}),
# original retweets (#1026)
("https://twitter.com/jessica_3978/status/1296304589591810048", {
"options": (("retweets", "original"),),
Expand Down

0 comments on commit e99a9b2

Please sign in to comment.