From 2eb38810c5ffb7c675581543ca83091e4adbda92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 23 Oct 2019 22:18:29 +0200 Subject: [PATCH] [twitter] fix image extraction when logged in (#452) ... for individual tweets. To get a Tweet page with the old Twitter layout, an Internet Explorer User-Agent (e.g. Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko) as well as a Referer header pointing to the page itself is required. The "app_shell_visited" cookie appears to be optional at the moment, but that is what a regular web browser would send. --- gallery_dl/extractor/twitter.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 28896e7b..5eb8336e 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -257,9 +257,15 @@ class TwitterTweetExtractor(TwitterExtractor): return {"user": self.user, "tweet_id": self.tweet_id} def tweets(self): - self.session.cookies.clear() url = "{}/i/web/status/{}".format(self.root, self.tweet_id) - page = self.request(url).text + cookies = {"app_shell_visited": "1"} + headers = { + "Referer" : url, + "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; " + "Trident/7.0; rv:11.0) like Gecko", + } + + page = self.request(url, cookies=cookies, headers=headers).text end = page.index('class="js-tweet-stats-container') beg = page.rindex('