retry Cloudflare challenges
This commit is contained in:
@@ -13,7 +13,7 @@ import time
|
||||
import operator
|
||||
import collections
|
||||
import urllib.parse
|
||||
from . import text, exception
|
||||
from . import text
|
||||
from .cache import memcache
|
||||
|
||||
|
||||
@@ -58,14 +58,13 @@ def solve_challenge(session, response, kwargs):
|
||||
cookie.name: cookie.value
|
||||
for cookie in cf_response.cookies
|
||||
}
|
||||
|
||||
if not cookies:
|
||||
import logging
|
||||
log = logging.getLogger("cloudflare")
|
||||
rtype = "CAPTCHA" if is_captcha(cf_response) else "Unexpected"
|
||||
log.error("%s response", rtype)
|
||||
log.debug("Headers:\n%s", cf_response.headers)
|
||||
log.debug("Content:\n%s", cf_response.text)
|
||||
raise exception.StopExtraction()
|
||||
return cf_response, None, None
|
||||
|
||||
domain = next(iter(cf_response.cookies)).domain
|
||||
cookies["__cfduid"] = response.cookies.get("__cfduid", "")
|
||||
|
||||
@@ -99,18 +99,20 @@ class Extractor():
|
||||
return response
|
||||
if notfound and code == 404:
|
||||
raise exception.NotFoundError(notfound)
|
||||
|
||||
reason = response.reason
|
||||
if cloudflare.is_challenge(response):
|
||||
self.log.info("Solving Cloudflare challenge")
|
||||
response, domain, cookies = cloudflare.solve_challenge(
|
||||
session, response, kwargs)
|
||||
if response.status_code >= 400:
|
||||
continue
|
||||
cloudflare.cookies.update(self.category, (domain, cookies))
|
||||
return response
|
||||
if cookies:
|
||||
cloudflare.cookies.update(
|
||||
self.category, (domain, cookies))
|
||||
return response
|
||||
if cloudflare.is_captcha(response):
|
||||
self.log.warning("Cloudflare CAPTCHA")
|
||||
|
||||
msg = "'{} {}' for '{}'".format(code, response.reason, url)
|
||||
msg = "'{} {}' for '{}'".format(code, reason, url)
|
||||
if code < 500 and code != 429 and code != 430:
|
||||
break
|
||||
|
||||
|
||||
Reference in New Issue
Block a user