[twitter] support 'grok' cards content (#7040)
This commit is contained in:
@@ -234,6 +234,13 @@ class TwitterExtractor(Extractor):
|
||||
for fmt in self._size_fallback:
|
||||
yield base + fmt
|
||||
|
||||
def _extract_components(self, tweet, data, files):
|
||||
for component_id in data["components"]:
|
||||
com = data["component_objects"][component_id]
|
||||
for conv in com["data"]["conversation_preview"]:
|
||||
for url in conv.get("mediaUrls") or ():
|
||||
files.append({"url": url})
|
||||
|
||||
def _extract_card(self, tweet, files):
|
||||
card = tweet["card"]
|
||||
if "legacy" in card:
|
||||
@@ -272,7 +279,11 @@ class TwitterExtractor(Extractor):
|
||||
return
|
||||
elif name == "unified_card":
|
||||
data = util.json_loads(bvals["unified_card"]["string_value"])
|
||||
self._extract_media(tweet, data["media_entities"].values(), files)
|
||||
if "media_entities" in data:
|
||||
self._extract_media(
|
||||
tweet, data["media_entities"].values(), files)
|
||||
if "component_objects" in data:
|
||||
self._extract_components(tweet, data, files)
|
||||
return
|
||||
|
||||
if self.cards == "ytdl":
|
||||
|
||||
@@ -628,7 +628,7 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
|
||||
"#comment" : "'birdwatch' note (#5317)",
|
||||
"#category": ("", "twitter", "tweet"),
|
||||
"#class" : twitter.TwitterTweetExtractor,
|
||||
"#options" : {"text-tweets": True},
|
||||
"#options" : {"text-tweets": True},
|
||||
|
||||
"birdwatch": "In addition to the known harm of lead exposure, especially to children, Mr. Kobach is incorrect when he states the mandate is unfunded. In fact, the BIPARTISAN Infrastructure Law Joe Biden signed into law in Nov 2021 provides $15B toward lead service line replacement projects. epa.gov/ground-water-a…",
|
||||
"content" : "Biden wants to replace lead pipes. He failed to mention that the unfunded mandate sets an almost impossible timeline, will cost billions, infringe on the rights of the States and their residents – all for benefits that may be entirely speculative. #sotu https://ag.ks.gov/media-center/news-releases/2024/02/09/kobach-leads-coalition-demanding-biden-drop-unnecessary-epa-rule",
|
||||
@@ -642,6 +642,15 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
|
||||
"#count" : 0,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://x.com/fw_rion_/status/1866737025824829544",
|
||||
"#comment" : "grok share (#7040)",
|
||||
"#category": ("", "twitter", "tweet"),
|
||||
"#class" : twitter.TwitterTweetExtractor,
|
||||
"#options" : {"cards": True},
|
||||
"#urls" : "https://pbs.twimg.com/grok-img-share/1866736156786008064.jpg",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes",
|
||||
"#category": ("", "twitter", "quotes"),
|
||||
|
||||
Reference in New Issue
Block a user