[twitter] support 'article' media (#8995)

This commit is contained in:
Mike Fährmann
2026-02-06 19:52:33 +01:00
parent 7a98a93a8e
commit 98ef34a9be
3 changed files with 46 additions and 0 deletions

View File

@@ -6355,6 +6355,16 @@ Description
Fetch media from promoted Tweets.
extractor.twitter.articles
--------------------------
Type
``bool``
Default
``true``
Description
Download media embedded in articles.
extractor.twitter.cards
-----------------------
Type

View File

@@ -873,6 +873,7 @@
"cookies" : null,
"ads" : false,
"articles" : true,
"cards" : false,
"cards-blacklist": [],
"csrf" : "cookies",

View File

@@ -37,6 +37,7 @@ class TwitterExtractor(Extractor):
def _init(self):
self.unavailable = self.config("unavailable", False)
self.textonly = self.config("text-tweets", False)
self.articles = self.config("articles", True)
self.retweets = self.config("retweets", False)
self.replies = self.config("replies", True)
self.twitpic = self.config("twitpic", False)
@@ -159,6 +160,15 @@ class TwitterExtractor(Extractor):
"%s: Error while extracting Card files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
if self.articles and "article" in tweet:
try:
self._extract_article(tweet, files)
except Exception as exc:
self.log.traceback(exc)
self.log.warning(
"%s: Error while extracting article files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
if self.twitpic:
try:
self._extract_twitpic(data, files)
@@ -319,6 +329,31 @@ class TwitterExtractor(Extractor):
url = f"ytdl:{self.root}/i/web/status/{tweet_id}"
files.append({"url": url})
def _extract_article(self, tweet, files):
article = tweet["article"]["article_results"]["result"]
if media := article.get("cover_media"):
info = media["media_info"]
files.append({
"media_id" : media["media_id"],
"media_key": media["media_key"],
"url" : info["original_img_url"],
"width" : info["original_img_width"],
"height" : info["original_img_height"],
"type" : "article:cover",
})
for media in article["media_entities"]:
info = media["media_info"]
files.append({
"media_id" : media["media_id"],
"media_key": media["media_key"],
"url" : info["original_img_url"],
"width" : info["original_img_width"],
"height" : info["original_img_height"],
"type" : "article:cover",
})
def _extract_twitpic(self, tweet, files):
urls = {}