merge #3449: [twitter] force HTTPS for TwitPic URLs

This commit is contained in:
Mike Fährmann
2023-01-05 14:57:03 +01:00

View File

@@ -221,14 +221,16 @@ class TwitterExtractor(Extractor):
def _extract_twitpic(self, tweet, files):
for url in tweet["entities"].get("urls", ()):
url = url["expanded_url"]
if "//twitpic.com/" in url and "/photos/" not in url:
response = self.request(url, fatal=False)
if response.status_code >= 400:
continue
url = text.extr(
response.text, 'name="twitter:image" value="', '"')
if url:
files.append({"url": url})
if "//twitpic.com/" not in url or "/photos/" in url:
continue
if url.startswith("http:"):
url = "https" + url[4:]
response = self.request(url, fatal=False)
if response.status_code >= 400:
continue
url = text.extr(response.text, 'name="twitter:image" value="', '"')
if url:
files.append({"url": url})
def _transform_tweet(self, tweet):
if "author" in tweet: