From d2cad599f7af00d4713fb9ba29d77f3772701c7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 25 Feb 2025 20:47:31 +0100 Subject: [PATCH] [twitter] support 'grok' cards content (#7040) --- gallery_dl/extractor/twitter.py | 13 ++++++++++++- test/results/twitter.py | 11 ++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 3bc6ec1e..c391bade 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -234,6 +234,13 @@ class TwitterExtractor(Extractor): for fmt in self._size_fallback: yield base + fmt + def _extract_components(self, tweet, data, files): + for component_id in data["components"]: + com = data["component_objects"][component_id] + for conv in com["data"]["conversation_preview"]: + for url in conv.get("mediaUrls") or (): + files.append({"url": url}) + def _extract_card(self, tweet, files): card = tweet["card"] if "legacy" in card: @@ -272,7 +279,11 @@ class TwitterExtractor(Extractor): return elif name == "unified_card": data = util.json_loads(bvals["unified_card"]["string_value"]) - self._extract_media(tweet, data["media_entities"].values(), files) + if "media_entities" in data: + self._extract_media( + tweet, data["media_entities"].values(), files) + if "component_objects" in data: + self._extract_components(tweet, data, files) return if self.cards == "ytdl": diff --git a/test/results/twitter.py b/test/results/twitter.py index 6da5d68e..ef47a304 100644 --- a/test/results/twitter.py +++ b/test/results/twitter.py @@ -628,7 +628,7 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi "#comment" : "'birdwatch' note (#5317)", "#category": ("", "twitter", "tweet"), "#class" : twitter.TwitterTweetExtractor, - "#options" : {"text-tweets": True}, + "#options" : {"text-tweets": True}, "birdwatch": "In addition to the known harm of lead exposure, especially to children, Mr. Kobach is incorrect when he states the mandate is unfunded. In fact, the BIPARTISAN Infrastructure Law Joe Biden signed into law in Nov 2021 provides $15B toward lead service line replacement projects. epa.gov/ground-water-a…", "content" : "Biden wants to replace lead pipes. He failed to mention that the unfunded mandate sets an almost impossible timeline, will cost billions, infringe on the rights of the States and their residents – all for benefits that may be entirely speculative. #sotu https://ag.ks.gov/media-center/news-releases/2024/02/09/kobach-leads-coalition-demanding-biden-drop-unnecessary-epa-rule", @@ -642,6 +642,15 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi "#count" : 0, }, +{ + "#url" : "https://x.com/fw_rion_/status/1866737025824829544", + "#comment" : "grok share (#7040)", + "#category": ("", "twitter", "tweet"), + "#class" : twitter.TwitterTweetExtractor, + "#options" : {"cards": True}, + "#urls" : "https://pbs.twimg.com/grok-img-share/1866736156786008064.jpg", +}, + { "#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes", "#category": ("", "twitter", "quotes"),