[twitter] extract alt texts as 'description' (closes #2617)
This commit is contained in:
@@ -104,6 +104,7 @@ class TwitterExtractor(Extractor):
|
|||||||
|
|
||||||
def _extract_media(self, tweet, entities, files):
|
def _extract_media(self, tweet, entities, files):
|
||||||
for media in entities:
|
for media in entities:
|
||||||
|
descr = media.get("ext_alt_text")
|
||||||
width = media["original_info"].get("width", 0)
|
width = media["original_info"].get("width", 0)
|
||||||
height = media["original_info"].get("height", 0)
|
height = media["original_info"].get("height", 0)
|
||||||
|
|
||||||
@@ -112,9 +113,10 @@ class TwitterExtractor(Extractor):
|
|||||||
files.append({
|
files.append({
|
||||||
"url": "ytdl:{}/i/web/status/{}".format(
|
"url": "ytdl:{}/i/web/status/{}".format(
|
||||||
self.root, tweet["id_str"]),
|
self.root, tweet["id_str"]),
|
||||||
"width" : width,
|
"width" : width,
|
||||||
"height" : height,
|
"height" : height,
|
||||||
"extension": None,
|
"extension" : None,
|
||||||
|
"description": descr,
|
||||||
})
|
})
|
||||||
elif self.videos:
|
elif self.videos:
|
||||||
video_info = media["video_info"]
|
video_info = media["video_info"]
|
||||||
@@ -123,22 +125,24 @@ class TwitterExtractor(Extractor):
|
|||||||
key=lambda v: v.get("bitrate", 0),
|
key=lambda v: v.get("bitrate", 0),
|
||||||
)
|
)
|
||||||
files.append({
|
files.append({
|
||||||
"url" : variant["url"],
|
"url" : variant["url"],
|
||||||
"width" : width,
|
"width" : width,
|
||||||
"height" : height,
|
"height" : height,
|
||||||
"bitrate" : variant.get("bitrate", 0),
|
"bitrate" : variant.get("bitrate", 0),
|
||||||
"duration": video_info.get(
|
"duration" : video_info.get(
|
||||||
"duration_millis", 0) / 1000,
|
"duration_millis", 0) / 1000,
|
||||||
|
"description": descr,
|
||||||
})
|
})
|
||||||
elif "media_url_https" in media:
|
elif "media_url_https" in media:
|
||||||
url = media["media_url_https"]
|
url = media["media_url_https"]
|
||||||
base, _, fmt = url.rpartition(".")
|
base, _, fmt = url.rpartition(".")
|
||||||
base += "?format=" + fmt + "&name="
|
base += "?format=" + fmt + "&name="
|
||||||
files.append(text.nameext_from_url(url, {
|
files.append(text.nameext_from_url(url, {
|
||||||
"url" : base + self._size_image,
|
"url" : base + self._size_image,
|
||||||
"width" : width,
|
"width" : width,
|
||||||
"height" : height,
|
"height" : height,
|
||||||
"_fallback": self._image_fallback(base),
|
"_fallback" : self._image_fallback(base),
|
||||||
|
"description": descr,
|
||||||
}))
|
}))
|
||||||
else:
|
else:
|
||||||
files.append({"url": media["media_url"]})
|
files.append({"url": media["media_url"]})
|
||||||
@@ -711,6 +715,10 @@ class TwitterTweetExtractor(TwitterExtractor):
|
|||||||
"options": (("syndication", True),),
|
"options": (("syndication", True),),
|
||||||
"count": 1,
|
"count": 1,
|
||||||
}),
|
}),
|
||||||
|
# media alt texts / descriptions (#2617)
|
||||||
|
("https://twitter.com/my0nruri/status/1528379296041299968", {
|
||||||
|
"keyword": {"description": "oc"}
|
||||||
|
}),
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
|
|||||||
Reference in New Issue
Block a user