[twitter] support 'article' media (#8995)
This commit is contained in:
@@ -6355,6 +6355,16 @@ Description
|
||||
Fetch media from promoted Tweets.
|
||||
|
||||
|
||||
extractor.twitter.articles
|
||||
--------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Download media embedded in articles.
|
||||
|
||||
|
||||
extractor.twitter.cards
|
||||
-----------------------
|
||||
Type
|
||||
|
||||
@@ -873,6 +873,7 @@
|
||||
"cookies" : null,
|
||||
|
||||
"ads" : false,
|
||||
"articles" : true,
|
||||
"cards" : false,
|
||||
"cards-blacklist": [],
|
||||
"csrf" : "cookies",
|
||||
|
||||
@@ -37,6 +37,7 @@ class TwitterExtractor(Extractor):
|
||||
def _init(self):
|
||||
self.unavailable = self.config("unavailable", False)
|
||||
self.textonly = self.config("text-tweets", False)
|
||||
self.articles = self.config("articles", True)
|
||||
self.retweets = self.config("retweets", False)
|
||||
self.replies = self.config("replies", True)
|
||||
self.twitpic = self.config("twitpic", False)
|
||||
@@ -159,6 +160,15 @@ class TwitterExtractor(Extractor):
|
||||
"%s: Error while extracting Card files (%s: %s)",
|
||||
data["id_str"], exc.__class__.__name__, exc)
|
||||
|
||||
if self.articles and "article" in tweet:
|
||||
try:
|
||||
self._extract_article(tweet, files)
|
||||
except Exception as exc:
|
||||
self.log.traceback(exc)
|
||||
self.log.warning(
|
||||
"%s: Error while extracting article files (%s: %s)",
|
||||
data["id_str"], exc.__class__.__name__, exc)
|
||||
|
||||
if self.twitpic:
|
||||
try:
|
||||
self._extract_twitpic(data, files)
|
||||
@@ -319,6 +329,31 @@ class TwitterExtractor(Extractor):
|
||||
url = f"ytdl:{self.root}/i/web/status/{tweet_id}"
|
||||
files.append({"url": url})
|
||||
|
||||
def _extract_article(self, tweet, files):
|
||||
article = tweet["article"]["article_results"]["result"]
|
||||
|
||||
if media := article.get("cover_media"):
|
||||
info = media["media_info"]
|
||||
files.append({
|
||||
"media_id" : media["media_id"],
|
||||
"media_key": media["media_key"],
|
||||
"url" : info["original_img_url"],
|
||||
"width" : info["original_img_width"],
|
||||
"height" : info["original_img_height"],
|
||||
"type" : "article:cover",
|
||||
})
|
||||
|
||||
for media in article["media_entities"]:
|
||||
info = media["media_info"]
|
||||
files.append({
|
||||
"media_id" : media["media_id"],
|
||||
"media_key": media["media_key"],
|
||||
"url" : info["original_img_url"],
|
||||
"width" : info["original_img_width"],
|
||||
"height" : info["original_img_height"],
|
||||
"type" : "article:cover",
|
||||
})
|
||||
|
||||
def _extract_twitpic(self, tweet, files):
|
||||
urls = {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user