[sexcom] prevent '.css' file downloads (#7632)
by detecting homepage redirects and improve redirect handling in general
This commit is contained in:
@@ -66,11 +66,26 @@ class SexcomExtractor(Extractor):
|
|||||||
url = text.urljoin(self.root, text.unescape(url))
|
url = text.urljoin(self.root, text.unescape(url))
|
||||||
|
|
||||||
def _parse_pin(self, url):
|
def _parse_pin(self, url):
|
||||||
response = self.request(url, fatal=False)
|
if "/pin/" in url:
|
||||||
|
if url[-1] != "/":
|
||||||
|
url += "/"
|
||||||
|
elif url[-1] == "/":
|
||||||
|
url = url[:-1]
|
||||||
|
|
||||||
|
response = self.request(url, fatal=False, allow_redirects=False)
|
||||||
|
location = response.headers.get("location")
|
||||||
|
|
||||||
|
if location:
|
||||||
|
if location[0] == "/":
|
||||||
|
location = self.root + location
|
||||||
|
if len(location) <= 25:
|
||||||
|
return self.log.warning(
|
||||||
|
'Unable to fetch %s: Redirect to homepage', url)
|
||||||
|
response = self.request(location, fatal=False)
|
||||||
|
|
||||||
if response.status_code >= 400:
|
if response.status_code >= 400:
|
||||||
self.log.warning('Unable to fetch %s ("%s %s")',
|
return self.log.warning('Unable to fetch %s: %s %s',
|
||||||
url, response.status_code, response.reason)
|
url, response.status_code, response.reason)
|
||||||
return None
|
|
||||||
|
|
||||||
if "/pin/" in response.url:
|
if "/pin/" in response.url:
|
||||||
return self._parse_pin_legacy(response)
|
return self._parse_pin_legacy(response)
|
||||||
|
|||||||
Reference in New Issue
Block a user