[twitter] support 'grok' cards content (#7040)

This commit is contained in:
Mike Fährmann
2025-02-25 20:47:31 +01:00
parent 876169ded5
commit d2cad599f7
2 changed files with 22 additions and 2 deletions

View File

@@ -234,6 +234,13 @@ class TwitterExtractor(Extractor):
for fmt in self._size_fallback:
yield base + fmt
def _extract_components(self, tweet, data, files):
for component_id in data["components"]:
com = data["component_objects"][component_id]
for conv in com["data"]["conversation_preview"]:
for url in conv.get("mediaUrls") or ():
files.append({"url": url})
def _extract_card(self, tweet, files):
card = tweet["card"]
if "legacy" in card:
@@ -272,7 +279,11 @@ class TwitterExtractor(Extractor):
return
elif name == "unified_card":
data = util.json_loads(bvals["unified_card"]["string_value"])
self._extract_media(tweet, data["media_entities"].values(), files)
if "media_entities" in data:
self._extract_media(
tweet, data["media_entities"].values(), files)
if "component_objects" in data:
self._extract_components(tweet, data, files)
return
if self.cards == "ytdl":

View File

@@ -642,6 +642,15 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
"#count" : 0,
},
{
"#url" : "https://x.com/fw_rion_/status/1866737025824829544",
"#comment" : "grok share (#7040)",
"#category": ("", "twitter", "tweet"),
"#class" : twitter.TwitterTweetExtractor,
"#options" : {"cards": True},
"#urls" : "https://pbs.twimg.com/grok-img-share/1866736156786008064.jpg",
},
{
"#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes",
"#category": ("", "twitter", "quotes"),