[twitter] fix image extraction when logged in (#452)
... for individual tweets. To get a Tweet page with the old Twitter layout, an Internet Explorer User-Agent (e.g. Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko) as well as a Referer header pointing to the page itself is required. The "app_shell_visited" cookie appears to be optional at the moment, but that is what a regular web browser would send.
This commit is contained in:
@@ -257,9 +257,15 @@ class TwitterTweetExtractor(TwitterExtractor):
|
|||||||
return {"user": self.user, "tweet_id": self.tweet_id}
|
return {"user": self.user, "tweet_id": self.tweet_id}
|
||||||
|
|
||||||
def tweets(self):
|
def tweets(self):
|
||||||
self.session.cookies.clear()
|
|
||||||
url = "{}/i/web/status/{}".format(self.root, self.tweet_id)
|
url = "{}/i/web/status/{}".format(self.root, self.tweet_id)
|
||||||
page = self.request(url).text
|
cookies = {"app_shell_visited": "1"}
|
||||||
|
headers = {
|
||||||
|
"Referer" : url,
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; "
|
||||||
|
"Trident/7.0; rv:11.0) like Gecko",
|
||||||
|
}
|
||||||
|
|
||||||
|
page = self.request(url, cookies=cookies, headers=headers).text
|
||||||
end = page.index('class="js-tweet-stats-container')
|
end = page.index('class="js-tweet-stats-container')
|
||||||
beg = page.rindex('<div class="tweet ', 0, end)
|
beg = page.rindex('<div class="tweet ', 0, end)
|
||||||
return (page[beg:end],)
|
return (page[beg:end],)
|
||||||
|
|||||||
Reference in New Issue
Block a user