From 3eb352fcb0f2b81f867f2fb9e27d5d3c7f509ab6 Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Fri, 23 Dec 2022 18:16:34 +0800 Subject: [PATCH 1/3] [twitter] force HTTPS for TwitPic URLs --- gallery_dl/extractor/twitter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 22aa78e6..4a7740ad 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -221,7 +221,8 @@ class TwitterExtractor(Extractor): for url in tweet["entities"].get("urls", ()): url = url["expanded_url"] if "//twitpic.com/" in url and "/photos/" not in url: - response = self.request(url, fatal=False) + response = self.request( + url.replace("http:", "https:", 1), fatal=False) if response.status_code >= 400: continue url = text.extr( From 38786a95937e96e39c5bee1cca5574deeaf2f3e1 Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Tue, 27 Dec 2022 12:23:12 +0800 Subject: [PATCH 2/3] [twitter] refactor extraction of TwitPic URLs flattening --- gallery_dl/extractor/twitter.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 4a7740ad..a2e414e4 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -220,15 +220,14 @@ class TwitterExtractor(Extractor): def _extract_twitpic(self, tweet, files): for url in tweet["entities"].get("urls", ()): url = url["expanded_url"] - if "//twitpic.com/" in url and "/photos/" not in url: - response = self.request( - url.replace("http:", "https:", 1), fatal=False) - if response.status_code >= 400: - continue - url = text.extr( - response.text, 'name="twitter:image" value="', '"') - if url: - files.append({"url": url}) + if "//twitpic.com/" not in url or "/photos/" in url: + continue + resp = self.request(url.replace("http:", "https:", 1), fatal=False) + if resp.status_code >= 400: + continue + url = text.extr(resp.text, 'name="twitter:image" value="', '"') + if url: + files.append({"url": url}) def _transform_tweet(self, tweet): if "author" in tweet: From e18482e9aed553cfb42410ca1e5292584bceb8ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 5 Jan 2023 14:55:55 +0100 Subject: [PATCH 3/3] [twitter] improve 'http' -> 'https' replacement --- gallery_dl/extractor/twitter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index a2e414e4..7dbc63dd 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -222,10 +222,12 @@ class TwitterExtractor(Extractor): url = url["expanded_url"] if "//twitpic.com/" not in url or "/photos/" in url: continue - resp = self.request(url.replace("http:", "https:", 1), fatal=False) - if resp.status_code >= 400: + if url.startswith("http:"): + url = "https" + url[4:] + response = self.request(url, fatal=False) + if response.status_code >= 400: continue - url = text.extr(resp.text, 'name="twitter:image" value="', '"') + url = text.extr(response.text, 'name="twitter:image" value="', '"') if url: files.append({"url": url})