[twitter] support 'grok' cards content (#7040)

This commit is contained in:
Mike Fährmann
2025-02-25 20:47:31 +01:00
parent 876169ded5
commit d2cad599f7
2 changed files with 22 additions and 2 deletions

View File

@@ -234,6 +234,13 @@ class TwitterExtractor(Extractor):
for fmt in self._size_fallback:
yield base + fmt
def _extract_components(self, tweet, data, files):
for component_id in data["components"]:
com = data["component_objects"][component_id]
for conv in com["data"]["conversation_preview"]:
for url in conv.get("mediaUrls") or ():
files.append({"url": url})
def _extract_card(self, tweet, files):
card = tweet["card"]
if "legacy" in card:
@@ -272,7 +279,11 @@ class TwitterExtractor(Extractor):
return
elif name == "unified_card":
data = util.json_loads(bvals["unified_card"]["string_value"])
self._extract_media(tweet, data["media_entities"].values(), files)
if "media_entities" in data:
self._extract_media(
tweet, data["media_entities"].values(), files)
if "component_objects" in data:
self._extract_components(tweet, data, files)
return
if self.cards == "ytdl":

View File

@@ -628,7 +628,7 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
"#comment" : "'birdwatch' note (#5317)",
"#category": ("", "twitter", "tweet"),
"#class" : twitter.TwitterTweetExtractor,
"#options" : {"text-tweets": True},
"#options" : {"text-tweets": True},
"birdwatch": "In addition to the known harm of lead exposure, especially to children, Mr. Kobach is incorrect when he states the mandate is unfunded. In fact, the BIPARTISAN Infrastructure Law Joe Biden signed into law in Nov 2021 provides $15B toward lead service line replacement projects. epa.gov/ground-water-a…",
"content" : "Biden wants to replace lead pipes. He failed to mention that the unfunded mandate sets an almost impossible timeline, will cost billions, infringe on the rights of the States and their residents all for benefits that may be entirely speculative. #sotu https://ag.ks.gov/media-center/news-releases/2024/02/09/kobach-leads-coalition-demanding-biden-drop-unnecessary-epa-rule",
@@ -642,6 +642,15 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
"#count" : 0,
},
{
"#url" : "https://x.com/fw_rion_/status/1866737025824829544",
"#comment" : "grok share (#7040)",
"#category": ("", "twitter", "tweet"),
"#class" : twitter.TwitterTweetExtractor,
"#options" : {"cards": True},
"#urls" : "https://pbs.twimg.com/grok-img-share/1866736156786008064.jpg",
},
{
"#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes",
"#category": ("", "twitter", "quotes"),