[twitter] match and use 'i/web' status URLs

This commit is contained in:
Mike Fährmann
2019-09-24 21:18:05 +02:00
parent 5a1a0f5325
commit 66cac207ac

View File

@@ -190,7 +190,7 @@ class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets""" """Extractor for images from individual tweets"""
subcategory = "tweet" subcategory = "tweet"
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/status/(\d+)") r"/([^/?&#]+|i/web)/status/(\d+)")
test = ( test = (
("https://twitter.com/supernaturepics/status/604341487988576256", { ("https://twitter.com/supernaturepics/status/604341487988576256", {
"url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580", "url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580",
@@ -217,6 +217,10 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("videos", True),), "options": (("videos", True),),
"pattern": r"ytdl:https://twitter.com/.*/1103767554424598528$", "pattern": r"ytdl:https://twitter.com/.*/1103767554424598528$",
}), }),
# /i/web/ URL
("https://twitter.com/i/web/status/1155074198240292865", {
"pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig",
}),
) )
def __init__(self, match): def __init__(self, match):
@@ -228,7 +232,7 @@ class TwitterTweetExtractor(TwitterExtractor):
def tweets(self): def tweets(self):
self.session.cookies.clear() self.session.cookies.clear()
url = "{}/{}/status/{}".format(self.root, self.user, self.tweet_id) url = "{}/i/web/status/{}".format(self.root, self.tweet_id)
page = self.request(url).text page = self.request(url).text
end = page.index('class="js-tweet-stats-container') end = page.index('class="js-tweet-stats-container')
beg = page.rindex('<div class="tweet ', 0, end) beg = page.rindex('<div class="tweet ', 0, end)